diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 605b67e77fc..ab7d8389c88 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -155,6 +155,34 @@ add_library(cudf
     src/ast/transform.cu
     src/binaryop/binaryop.cpp
     src/binaryop/compiled/binary_ops.cu
+    src/binaryop/compiled/Add.cu
+    src/binaryop/compiled/ATan2.cu
+    src/binaryop/compiled/BitwiseAnd.cu
+    src/binaryop/compiled/BitwiseOr.cu
+    src/binaryop/compiled/BitwiseXor.cu
+    src/binaryop/compiled/Less.cu
+    src/binaryop/compiled/Greater.cu
+    src/binaryop/compiled/LessEqual.cu
+    src/binaryop/compiled/GreaterEqual.cu
+    src/binaryop/compiled/Div.cu
+    src/binaryop/compiled/equality_ops.cu
+    src/binaryop/compiled/FloorDiv.cu
+    src/binaryop/compiled/LogBase.cu
+    src/binaryop/compiled/LogicalAnd.cu
+    src/binaryop/compiled/LogicalOr.cu
+    src/binaryop/compiled/Mod.cu
+    src/binaryop/compiled/Mul.cu
+    src/binaryop/compiled/NullMax.cu
+    src/binaryop/compiled/NullMin.cu
+    src/binaryop/compiled/PMod.cu
+    src/binaryop/compiled/Pow.cu
+    src/binaryop/compiled/PyMod.cu
+    src/binaryop/compiled/ShiftLeft.cu
+    src/binaryop/compiled/ShiftRight.cu
+    src/binaryop/compiled/ShiftRightUnsigned.cu
+    src/binaryop/compiled/Sub.cu
+    src/binaryop/compiled/TrueDiv.cu
+    src/binaryop/compiled/util.cpp
     src/labeling/label_bins.cu
     src/bitmask/null_mask.cu
     src/bitmask/is_element_valid.cpp
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index cf681a96cbd..6a2b71ae1d9 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -203,6 +203,7 @@ ConfigureBench(AST_BENCH ast/transform_benchmark.cpp)
 # - binaryop benchmark ----------------------------------------------------------------------------
 ConfigureBench(BINARYOP_BENCH
   binaryop/binaryop_benchmark.cpp
+  binaryop/compiled_binaryop_benchmark.cpp
   binaryop/jit_binaryop_benchmark.cpp)
 
 ###################################################################################################
diff --git a/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp
new file mode 100644
index 00000000000..aa86f3bedf8
--- /dev/null
+++ b/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fixture/benchmark_fixture.hpp>
+#include <synchronization/synchronization.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+
+#include <cudf/binaryop.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+
+template <typename TypeLhs, typename TypeRhs, typename TypeOut, cudf::binary_operator>
+class COMPILED_BINARYOP : public cudf::benchmark {
+};
+
+template <typename TypeLhs, typename TypeRhs, typename TypeOut>
+void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
+{
+  const cudf::size_type column_size{(cudf::size_type)state.range(0)};
+
+  auto data_it = thrust::make_counting_iterator(0);
+  cudf::test::fixed_width_column_wrapper<TypeLhs> input1(data_it, data_it + column_size);
+  cudf::test::fixed_width_column_wrapper<TypeRhs> input2(data_it, data_it + column_size);
+
+  auto lhs          = cudf::column_view(input1);
+  auto rhs          = cudf::column_view(input2);
+  auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());
+
+  // Call once for hot cache.
+  cudf::experimental::binary_operation(lhs, rhs, binop, output_dtype);
+
+  for (auto _ : state) {
+    cuda_event_timer timer(state, true);
+    cudf::experimental::binary_operation(lhs, rhs, binop, output_dtype);
+  }
+}
+
+// TODO tparam boolean for null.
+#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut)                    \
+  BENCHMARK_TEMPLATE_DEFINE_F(                                                         \
+    COMPILED_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \
+  (::benchmark::State & st)                                                            \
+  {                                                                                    \
+    BM_compiled_binaryop<TypeLhs, TypeRhs, TypeOut>(st, cudf::binary_operator::binop); \
+  }                                                                                    \
+  BENCHMARK_REGISTER_F(COMPILED_BINARYOP, binop)                                       \
+    ->Unit(benchmark::kMicrosecond)                                                    \
+    ->UseManualTime()                                                                  \
+    ->Arg(10000)      /* 10k */                                                        \
+    ->Arg(100000)     /* 100k */                                                       \
+    ->Arg(1000000)    /* 1M */                                                         \
+    ->Arg(10000000)   /* 10M */                                                        \
+    ->Arg(100000000); /* 100M */
+
+using namespace cudf;
+using namespace numeric;
+
+// clang-format off
+BINARYOP_BENCHMARK_DEFINE(float,        int64_t,      ADD,                  int32_t);
+BINARYOP_BENCHMARK_DEFINE(duration_s,   duration_D,   SUB,                  duration_ms);
+BINARYOP_BENCHMARK_DEFINE(float,        float,        MUL,                  int64_t);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      DIV,                  int64_t);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      TRUE_DIV,             int64_t);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      FLOOR_DIV,            int64_t);
+BINARYOP_BENCHMARK_DEFINE(double,       double,       MOD,                  double);
+BINARYOP_BENCHMARK_DEFINE(int32_t,      int64_t,      PMOD,                 double);
+BINARYOP_BENCHMARK_DEFINE(int32_t,      uint8_t,      PYMOD,                int64_t);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      POW,                  double);
+BINARYOP_BENCHMARK_DEFINE(float,        double,       LOG_BASE,             double);
+BINARYOP_BENCHMARK_DEFINE(float,        double,       ATAN2,                double);
+BINARYOP_BENCHMARK_DEFINE(int,          int,          SHIFT_LEFT,           int);
+BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      SHIFT_RIGHT,          int);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      SHIFT_RIGHT_UNSIGNED, int64_t);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      BITWISE_AND,          int16_t);
+BINARYOP_BENCHMARK_DEFINE(int16_t,      int32_t,      BITWISE_OR,           int64_t);
+BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      BITWISE_XOR,          int32_t);
+BINARYOP_BENCHMARK_DEFINE(double,       int8_t,       LOGICAL_AND,          bool);
+BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      LOGICAL_OR,           bool);
+BINARYOP_BENCHMARK_DEFINE(duration_ms,  duration_ns,  EQUAL,                bool);
+BINARYOP_BENCHMARK_DEFINE(decimal32,    decimal32,    NOT_EQUAL,            bool);
+BINARYOP_BENCHMARK_DEFINE(timestamp_s,  timestamp_s,  LESS,                 bool);
+BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s,  GREATER,              bool);
+BINARYOP_BENCHMARK_DEFINE(duration_ms,  duration_ns,  NULL_EQUALS,          bool);
+BINARYOP_BENCHMARK_DEFINE(decimal32,    decimal32,    NULL_MAX,             decimal32);
+BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s,   NULL_MIN,             timestamp_s);
diff --git a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp
index 29ca02a843d..3c02f47eeb7 100644
--- a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp
+++ b/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp
@@ -23,7 +23,7 @@
 
 #include <thrust/iterator/counting_iterator.h>
 
-template <typename TypeLhs, typename TypeRhs, typename TypeOut>
+template <typename TypeLhs, typename TypeRhs, typename TypeOut, cudf::binary_operator>
 class JIT_BINARYOP : public cudf::benchmark {
 };
 
@@ -50,22 +50,24 @@ void BM_binaryop(benchmark::State& state, cudf::binary_operator binop)
 }
 
 // TODO tparam boolean for null.
-#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut)           \
-  BENCHMARK_TEMPLATE_DEFINE_F(JIT_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut) \
-  (::benchmark::State & st)                                                   \
-  {                                                                           \
-    BM_binaryop<TypeLhs, TypeRhs, TypeOut>(st, cudf::binary_operator::binop); \
-  }                                                                           \
-  BENCHMARK_REGISTER_F(JIT_BINARYOP, binop)                                   \
-    ->Unit(benchmark::kMillisecond)                                           \
-    ->UseManualTime()                                                         \
-    ->Arg(10000)      /* 10k */                                               \
-    ->Arg(100000)     /* 100k */                                              \
-    ->Arg(1000000)    /* 1M */                                                \
-    ->Arg(10000000)   /* 10M */                                               \
+#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut)               \
+  BENCHMARK_TEMPLATE_DEFINE_F(                                                    \
+    JIT_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \
+  (::benchmark::State & st)                                                       \
+  {                                                                               \
+    BM_binaryop<TypeLhs, TypeRhs, TypeOut>(st, cudf::binary_operator::binop);     \
+  }                                                                               \
+  BENCHMARK_REGISTER_F(JIT_BINARYOP, binop)                                       \
+    ->Unit(benchmark::kMicrosecond)                                               \
+    ->UseManualTime()                                                             \
+    ->Arg(10000)      /* 10k */                                                   \
+    ->Arg(100000)     /* 100k */                                                  \
+    ->Arg(1000000)    /* 1M */                                                    \
+    ->Arg(10000000)   /* 10M */                                                   \
     ->Arg(100000000); /* 100M */
 
 using namespace cudf;
+using namespace numeric;
 
 // clang-format off
 BINARYOP_BENCHMARK_DEFINE(float,        int64_t,      ADD,                  int32_t);
@@ -75,16 +77,23 @@ BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      DIV,                  int6
 BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      TRUE_DIV,             int64_t);
 BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      FLOOR_DIV,            int64_t);
 BINARYOP_BENCHMARK_DEFINE(double,       double,       MOD,                  double);
+BINARYOP_BENCHMARK_DEFINE(int32_t,      int64_t,      PMOD,                 double);
+BINARYOP_BENCHMARK_DEFINE(int32_t,      uint8_t,      PYMOD,                int64_t);
 BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      POW,                  double);
+BINARYOP_BENCHMARK_DEFINE(float,        double,       LOG_BASE,             double);
+BINARYOP_BENCHMARK_DEFINE(float,        double,       ATAN2,                double);
+BINARYOP_BENCHMARK_DEFINE(int,          int,          SHIFT_LEFT,           int);
+BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      SHIFT_RIGHT,          int);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      SHIFT_RIGHT_UNSIGNED, int64_t);
 BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      BITWISE_AND,          int16_t);
 BINARYOP_BENCHMARK_DEFINE(int16_t,      int32_t,      BITWISE_OR,           int64_t);
 BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      BITWISE_XOR,          int32_t);
-BINARYOP_BENCHMARK_DEFINE(double,       int8_t,       LOGICAL_AND,          int16_t);
+BINARYOP_BENCHMARK_DEFINE(double,       int8_t,       LOGICAL_AND,          bool);
 BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      LOGICAL_OR,           bool);
+BINARYOP_BENCHMARK_DEFINE(duration_ms,  duration_ns,  EQUAL,                bool);
+BINARYOP_BENCHMARK_DEFINE(decimal32,    decimal32,    NOT_EQUAL,            bool);
 BINARYOP_BENCHMARK_DEFINE(timestamp_s,  timestamp_s,  LESS,                 bool);
 BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s,  GREATER,              bool);
-BINARYOP_BENCHMARK_DEFINE(int,          int,          SHIFT_LEFT,           int);
-BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      SHIFT_RIGHT,          int);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      SHIFT_RIGHT_UNSIGNED, int64_t);
-BINARYOP_BENCHMARK_DEFINE(int32_t,      int64_t,      PMOD,                 double);
-BINARYOP_BENCHMARK_DEFINE(float,        double,       ATAN2,                double);
+BINARYOP_BENCHMARK_DEFINE(duration_ms,  duration_ns,  NULL_EQUALS,          bool);
+BINARYOP_BENCHMARK_DEFINE(decimal32,    decimal32,    NULL_MAX,             decimal32);
+BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s,   NULL_MIN,             timestamp_s);
diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp
index 7099c29b9df..e6ff6b0eadc 100644
--- a/cpp/include/cudf/binaryop.hpp
+++ b/cpp/include/cudf/binaryop.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,37 +42,36 @@ enum class binary_operator : int32_t {
   FLOOR_DIV,             ///< operator / after promoting to 64 bit floating point and then
                          ///< flooring the result
   MOD,                   ///< operator %
+  PMOD,                  ///< positive modulo operator
+                         ///< If remainder is negative, this returns (remainder + divisor) % divisor
+                         ///< else, it returns (dividend % divisor)
   PYMOD,                 ///< operator % but following python's sign rules for negatives
   POW,                   ///< lhs ^ rhs
+  LOG_BASE,              ///< logarithm to the base
+  ATAN2,                 ///< 2-argument arctangent
+  SHIFT_LEFT,            ///< operator <<
+  SHIFT_RIGHT,           ///< operator >>
+  SHIFT_RIGHT_UNSIGNED,  ///< operator >>> (from Java)
+                         ///< Logical right shift. Casts to an unsigned value before shifting.
+  BITWISE_AND,           ///< operator &
+  BITWISE_OR,            ///< operator |
+  BITWISE_XOR,           ///< operator ^
+  LOGICAL_AND,           ///< operator &&
+  LOGICAL_OR,            ///< operator ||
   EQUAL,                 ///< operator ==
   NOT_EQUAL,             ///< operator !=
   LESS,                  ///< operator <
   GREATER,               ///< operator >
   LESS_EQUAL,            ///< operator <=
   GREATER_EQUAL,         ///< operator >=
-  BITWISE_AND,           ///< operator &
-  BITWISE_OR,            ///< operator |
-  BITWISE_XOR,           ///< operator ^
-  LOGICAL_AND,           ///< operator &&
-  LOGICAL_OR,            ///< operator ||
-  COALESCE,              ///< operator x,y  x is null ? y : x
-  GENERIC_BINARY,        ///< generic binary operator to be generated with input
-                         ///< ptx code
-  SHIFT_LEFT,            ///< operator <<
-  SHIFT_RIGHT,           ///< operator >>
-  SHIFT_RIGHT_UNSIGNED,  ///< operator >>> (from Java)
-                         ///< Logical right shift. Casts to an unsigned value before shifting.
-  LOG_BASE,              ///< logarithm to the base
-  ATAN2,                 ///< 2-argument arctangent
-  PMOD,                  ///< positive modulo operator
-                         ///< If remainder is negative, this returns (remainder + divisor) % divisor
-                         ///< else, it returns (dividend % divisor)
   NULL_EQUALS,           ///< Returns true when both operands are null; false when one is null; the
                          ///< result of equality when both are non-null
   NULL_MAX,              ///< Returns max of operands when both are non-null; returns the non-null
                          ///< operand when one is null; or invalid when both are null
   NULL_MIN,              ///< Returns min of operands when both are non-null; returns the non-null
                          ///< operand when one is null; or invalid when both are null
+  GENERIC_BINARY,        ///< generic binary operator to be generated with input
+                         ///< ptx code
   INVALID_BINARY         ///< invalid operation
 };
 /**
@@ -87,6 +86,7 @@ enum class binary_operator : int32_t {
  *
  * @param lhs         The left operand scalar
  * @param rhs         The right operand column
+ * @param op          The binary operator
  * @param output_type The desired data type of the output column
  * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return            Output column of `output_type` type containing the result of
@@ -112,6 +112,7 @@ std::unique_ptr<column> binary_operation(
  *
  * @param lhs         The left operand column
  * @param rhs         The right operand scalar
+ * @param op          The binary operator
  * @param output_type The desired data type of the output column
  * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return            Output column of `output_type` type containing the result of
@@ -135,6 +136,7 @@ std::unique_ptr<column> binary_operation(
  *
  * @param lhs         The left operand column
  * @param rhs         The right operand column
+ * @param op          The binary operator
  * @param output_type The desired data type of the output column
  * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return            Output column of `output_type` type containing the result of
@@ -202,5 +204,89 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op,
                                                          cudf::data_type const& lhs,
                                                          cudf::data_type const& rhs);
 
+namespace experimental {
+/**
+ * @brief Performs a binary operation between a scalar and a column.
+ *
+ * The output contains the result of `op(lhs, rhs[i])` for all `0 <= i < rhs.size()`
+ * The scalar is the left operand and the column elements are the right operand.
+ * This distinction is significant in case of non-commutative binary operations
+ *
+ * Regardless of the operator, the validity of the output value is the logical
+ * AND of the validity of the two operands except NullMin and NullMax (logical OR).
+ *
+ * @param lhs         The left operand scalar
+ * @param rhs         The right operand column
+ * @param op          The binary operator
+ * @param output_type The desired data type of the output column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
+ * @return            Output column of `output_type` type containing the result of
+ *                    the binary operation
+ * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
+ * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical
+ * operations.
+ */
+std::unique_ptr<column> binary_operation(
+  scalar const& lhs,
+  column_view const& rhs,
+  binary_operator op,
+  data_type output_type,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Performs a binary operation between a column and a scalar.
+ *
+ * The output contains the result of `op(lhs[i], rhs)` for all `0 <= i < lhs.size()`
+ * The column elements are the left operand and the scalar is the right operand.
+ * This distinction is significant in case of non-commutative binary operations
+ *
+ * Regardless of the operator, the validity of the output value is the logical
+ * AND of the validity of the two operands except NullMin and NullMax (logical OR).
+ *
+ * @param lhs         The left operand column
+ * @param rhs         The right operand scalar
+ * @param op          The binary operator
+ * @param output_type The desired data type of the output column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
+ * @return            Output column of `output_type` type containing the result of
+ *                    the binary operation
+ * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
+ * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical
+ * operations.
+ */
+std::unique_ptr<column> binary_operation(
+  column_view const& lhs,
+  scalar const& rhs,
+  binary_operator op,
+  data_type output_type,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Performs a binary operation between two columns.
+ *
+ * The output contains the result of `op(lhs[i], rhs[i])` for all `0 <= i < lhs.size()`
+ *
+ * Regardless of the operator, the validity of the output value is the logical
+ * AND of the validity of the two operands except NullMin and NullMax (logical OR).
+ *
+ * @param lhs         The left operand column
+ * @param rhs         The right operand column
+ * @param op          The binary operator
+ * @param output_type The desired data type of the output column
+ * @param mr          Device memory resource used to allocate the returned column's device memory
+ * @return            Output column of `output_type` type containing the result of
+ *                    the binary operation
+ * @throw cudf::logic_error if @p lhs and @p rhs are different sizes
+ * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical
+ * operations.
+ * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
+ */
+std::unique_ptr<column> binary_operation(
+  column_view const& lhs,
+  column_view const& rhs,
+  binary_operator op,
+  data_type output_type,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+}  // namespace experimental
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp
index 717cf8ea7b0..0e14b0c6bf5 100644
--- a/cpp/include/cudf/scalar/scalar.hpp
+++ b/cpp/include/cudf/scalar/scalar.hpp
@@ -154,7 +154,7 @@ class fixed_width_scalar : public scalar {
   void set_value(T value, rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
   /**
-   * @brief Implicit conversion operator to get the value of the scalar on the host.
+   * @brief Explicit conversion operator to get the value of the scalar on the host.
    */
   explicit operator value_type() const;
 
@@ -365,6 +365,11 @@ class fixed_point_scalar : public scalar {
    */
   T fixed_point_value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
 
+  /**
+   * @brief Explicit conversion operator to get the value of the scalar on the host.
+   */
+  explicit operator value_type() const;
+
   /**
    * @brief Returns a raw pointer to the value in device memory.
    */
@@ -465,7 +470,7 @@ class string_scalar : public scalar {
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
-   * @brief Implicit conversion operator to get the value of the scalar in a host std::string.
+   * @brief Explicit conversion operator to get the value of the scalar in a host std::string.
    */
   explicit operator std::string() const;
 
diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp
index e2f5f6db624..2cdc455e05c 100644
--- a/cpp/include/cudf/utilities/traits.hpp
+++ b/cpp/include/cudf/utilities/traits.hpp
@@ -47,16 +47,20 @@ using void_t = void;
  */
 #define CUDF_ENABLE_IF(...) std::enable_if_t<(__VA_ARGS__)>* = nullptr
 
-template <typename L, typename R, typename = void>
-struct is_relationally_comparable_impl : std::false_type {
-};
-
 template <typename L, typename R>
 using less_comparable = decltype(std::declval<L>() < std::declval<R>());
 
 template <typename L, typename R>
 using greater_comparable = decltype(std::declval<L>() > std::declval<R>());
 
+template <typename L, typename R>
+using equality_comparable = decltype(std::declval<L>() == std::declval<R>());
+
+namespace detail {
+template <typename L, typename R, typename = void>
+struct is_relationally_comparable_impl : std::false_type {
+};
+
 template <typename L, typename R>
 struct is_relationally_comparable_impl<L,
                                        R,
@@ -68,13 +72,26 @@ template <typename L, typename R, typename = void>
 struct is_equality_comparable_impl : std::false_type {
 };
 
-template <typename L, typename R>
-using equality_comparable = decltype(std::declval<L>() == std::declval<R>());
-
 template <typename L, typename R>
 struct is_equality_comparable_impl<L, R, void_t<equality_comparable<L, R>>> : std::true_type {
 };
 
+// has common type
+template <typename AlwaysVoid, typename... Ts>
+struct has_common_type_impl : std::false_type {
+};
+
+template <typename... Ts>
+struct has_common_type_impl<void_t<std::common_type_t<Ts...>>, Ts...> : std::true_type {
+};
+}  // namespace detail
+
+template <typename... Ts>
+using has_common_type = typename detail::has_common_type_impl<void, Ts...>::type;
+
+template <typename... Ts>
+constexpr inline bool has_common_type_v = detail::has_common_type_impl<void, Ts...>::value;
+
 template <typename T>
 using is_timestamp_t = cuda::std::disjunction<std::is_same<cudf::timestamp_D, T>,
                                               std::is_same<cudf::timestamp_s, T>,
@@ -104,7 +121,7 @@ using is_duration_t = cuda::std::disjunction<std::is_same<cudf::duration_D, T>,
 template <typename L, typename R>
 constexpr inline bool is_relationally_comparable()
 {
-  return is_relationally_comparable_impl<L, R>::value;
+  return detail::is_relationally_comparable_impl<L, R>::value;
 }
 
 /**
@@ -122,7 +139,7 @@ constexpr inline bool is_relationally_comparable()
 template <typename L, typename R>
 constexpr inline bool is_equality_comparable()
 {
-  return is_equality_comparable_impl<L, R>::value;
+  return detail::is_equality_comparable_impl<L, R>::value;
 }
 
 /**
diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp
index 8502d5832e6..cd088d81531 100644
--- a/cpp/include/cudf_test/base_fixture.hpp
+++ b/cpp/include/cudf_test/base_fixture.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -56,24 +56,24 @@ template <typename T, typename Enable = void>
 struct uniform_distribution_impl {
 };
 template <typename T>
-struct uniform_distribution_impl<
-  T,
-  std::enable_if_t<std::is_integral<T>::value && not cudf::is_boolean<T>()>> {
+struct uniform_distribution_impl<T, std::enable_if_t<std::is_integral<T>::value>> {
   using type = std::uniform_int_distribution<T>;
 };
 
-template <typename T>
-struct uniform_distribution_impl<T, std::enable_if_t<std::is_floating_point<T>::value>> {
-  using type = std::uniform_real_distribution<T>;
+template <>
+struct uniform_distribution_impl<bool> {
+  using type = std::bernoulli_distribution;
 };
 
 template <typename T>
-struct uniform_distribution_impl<T, std::enable_if_t<cudf::is_boolean<T>()>> {
-  using type = std::bernoulli_distribution;
+struct uniform_distribution_impl<T, std::enable_if_t<std::is_floating_point<T>::value>> {
+  using type = std::uniform_real_distribution<T>;
 };
 
 template <typename T>
-struct uniform_distribution_impl<T, std::enable_if_t<cudf::is_chrono<T>()>> {
+struct uniform_distribution_impl<
+  T,
+  std::enable_if_t<cudf::is_chrono<T>() or cudf::is_fixed_point<T>()>> {
   using type = std::uniform_int_distribution<typename T::rep>;
 };
 
@@ -131,7 +131,8 @@ class UniformRandomGenerator {
    * @param lower Lower bound of the range
    * @param upper Upper bound of the desired range
    */
-  template <typename TL = T, std::enable_if_t<!cudf::is_chrono<TL>()>* = nullptr>
+  template <typename TL                                                          = T,
+            std::enable_if_t<cudf::is_numeric<TL>() && !cudf::is_boolean<TL>()>* = nullptr>
   UniformRandomGenerator(T lower,
                          T upper,
                          uint64_t seed = detail::random_generator_incrementing_seed())
@@ -139,6 +140,14 @@ class UniformRandomGenerator {
   {
   }
 
+  template <typename TL = T, std::enable_if_t<cudf::is_boolean<TL>()>* = nullptr>
+  UniformRandomGenerator(T lower,
+                         T upper,
+                         uint64_t seed = detail::random_generator_incrementing_seed())
+    : dist{0.5}, rng{std::mt19937_64{seed}()}
+  {
+  }
+
   /**
    * @brief Construct a new Uniform Random Generator to generate uniformly
    * random numbers in the range `[upper,lower]`
@@ -146,7 +155,8 @@ class UniformRandomGenerator {
    * @param lower Lower bound of the range
    * @param upper Upper bound of the desired range
    */
-  template <typename TL = T, std::enable_if_t<cudf::is_chrono<TL>()>* = nullptr>
+  template <typename TL                                                            = T,
+            std::enable_if_t<cudf::is_chrono<TL>() or cudf::is_fixed_point<TL>()>* = nullptr>
   UniformRandomGenerator(typename TL::rep lower,
                          typename TL::rep upper,
                          uint64_t seed = detail::random_generator_incrementing_seed())
diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp
index 11a3383ee87..aaf193ff5cf 100644
--- a/cpp/src/binaryop/binaryop.cpp
+++ b/cpp/src/binaryop/binaryop.cpp
@@ -588,7 +588,7 @@ std::unique_ptr<column> binary_operation(scalar const& lhs,
                                          rmm::mr::device_memory_resource* mr)
 {
   if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING)
-    return binops::compiled::binary_operation(lhs, rhs, op, output_type, stream, mr);
+    return experimental::binary_operation(lhs, rhs, op, output_type, mr);
 
   if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type()))
     return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr);
@@ -615,7 +615,7 @@ std::unique_ptr<column> binary_operation(column_view const& lhs,
                                          rmm::mr::device_memory_resource* mr)
 {
   if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING)
-    return binops::compiled::binary_operation(lhs, rhs, op, output_type, stream, mr);
+    return experimental::binary_operation(lhs, rhs, op, output_type, mr);
 
   if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type()))
     return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr);
@@ -644,7 +644,7 @@ std::unique_ptr<column> binary_operation(column_view const& lhs,
   CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match");
 
   if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING)
-    return binops::compiled::binary_operation(lhs, rhs, op, output_type, stream, mr);
+    return experimental::binary_operation(lhs, rhs, op, output_type, mr);
 
   if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type()))
     return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr);
@@ -757,4 +757,78 @@ std::unique_ptr<column> binary_operation(column_view const& lhs,
   return detail::binary_operation(lhs, rhs, ptx, output_type, rmm::cuda_stream_default, mr);
 }
 
+// Experimental Compiled Binary operation
+namespace experimental {
+namespace detail {
+/**
+ * @copydoc cudf::experimental::binary_operation(column_view const&, column_view const&,
+ * binary_operator, data_type, rmm::mr::device_memory_resource*)
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+template <typename LhsType, typename RhsType>
+std::unique_ptr<column> binary_operation(LhsType const& lhs,
+                                         RhsType const& rhs,
+                                         binary_operator op,
+                                         data_type output_type,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr)
+{
+  if constexpr (std::is_same_v<LhsType, column_view> and std::is_same_v<RhsType, column_view>)
+    CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match");
+
+  if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING and
+      output_type.id() == type_id::STRING and
+      (op == binary_operator::NULL_MAX or op == binary_operator::NULL_MIN))
+    return binops::compiled::string_null_min_max(lhs, rhs, op, output_type, stream, mr);
+
+  if (not binops::compiled::is_supported_operation(output_type, lhs.type(), rhs.type(), op))
+    CUDF_FAIL("Unsupported operator for these types");
+
+  // TODO check if scale conversion required?
+  // if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type()))
+  //  CUDF_FAIL("Not yet supported fixed_point");
+  // return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr);
+
+  auto out = make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr);
+
+  if constexpr (std::is_same_v<LhsType, column_view>)
+    if (lhs.is_empty()) return out;
+  if constexpr (std::is_same_v<RhsType, column_view>)
+    if (rhs.is_empty()) return out;
+
+  auto out_view = out->mutable_view();
+  cudf::binops::compiled::binary_operation(out_view, lhs, rhs, op, stream);
+  return out;
+}
+}  // namespace detail
+
+std::unique_ptr<column> binary_operation(scalar const& lhs,
+                                         column_view const& rhs,
+                                         binary_operator op,
+                                         data_type output_type,
+                                         rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr);
+}
+std::unique_ptr<column> binary_operation(column_view const& lhs,
+                                         scalar const& rhs,
+                                         binary_operator op,
+                                         data_type output_type,
+                                         rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr);
+}
+std::unique_ptr<column> binary_operation(column_view const& lhs,
+                                         column_view const& rhs,
+                                         binary_operator op,
+                                         data_type output_type,
+                                         rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr);
+}
+}  // namespace experimental
 }  // namespace cudf
diff --git a/cpp/src/binaryop/compiled/ATan2.cu b/cpp/src/binaryop/compiled/ATan2.cu
new file mode 100644
index 00000000000..8e5cbf57f55
--- /dev/null
+++ b/cpp/src/binaryop/compiled/ATan2.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::ATan2>(mutable_column_device_view&,
+                                          column_device_view const&,
+                                          column_device_view const&,
+                                          bool is_lhs_scalar,
+                                          bool is_rhs_scalar,
+                                          rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/Add.cu b/cpp/src/binaryop/compiled/Add.cu
new file mode 100644
index 00000000000..4cd2ced66f4
--- /dev/null
+++ b/cpp/src/binaryop/compiled/Add.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::Add>(mutable_column_device_view&,
+                                        column_device_view const&,
+                                        column_device_view const&,
+                                        bool is_lhs_scalar,
+                                        bool is_rhs_scalar,
+                                        rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/BitwiseAnd.cu b/cpp/src/binaryop/compiled/BitwiseAnd.cu
new file mode 100644
index 00000000000..6abac2bd197
--- /dev/null
+++ b/cpp/src/binaryop/compiled/BitwiseAnd.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::BitwiseAnd>(mutable_column_device_view&,
+                                               column_device_view const&,
+                                               column_device_view const&,
+                                               bool is_lhs_scalar,
+                                               bool is_rhs_scalar,
+                                               rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/BitwiseOr.cu b/cpp/src/binaryop/compiled/BitwiseOr.cu
new file mode 100644
index 00000000000..6d523cbf1d1
--- /dev/null
+++ b/cpp/src/binaryop/compiled/BitwiseOr.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::BitwiseOr>(mutable_column_device_view&,
+                                              column_device_view const&,
+                                              column_device_view const&,
+                                              bool is_lhs_scalar,
+                                              bool is_rhs_scalar,
+                                              rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/BitwiseXor.cu b/cpp/src/binaryop/compiled/BitwiseXor.cu
new file mode 100644
index 00000000000..45175681574
--- /dev/null
+++ b/cpp/src/binaryop/compiled/BitwiseXor.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::BitwiseXor>(mutable_column_device_view&,
+                                               column_device_view const&,
+                                               column_device_view const&,
+                                               bool is_lhs_scalar,
+                                               bool is_rhs_scalar,
+                                               rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/Div.cu b/cpp/src/binaryop/compiled/Div.cu
new file mode 100644
index 00000000000..7cc895ecd06
--- /dev/null
+++ b/cpp/src/binaryop/compiled/Div.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::Div>(mutable_column_device_view&,
+                                        column_device_view const&,
+                                        column_device_view const&,
+                                        bool is_lhs_scalar,
+                                        bool is_rhs_scalar,
+                                        rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/FloorDiv.cu b/cpp/src/binaryop/compiled/FloorDiv.cu
new file mode 100644
index 00000000000..99ea2706b86
--- /dev/null
+++ b/cpp/src/binaryop/compiled/FloorDiv.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::FloorDiv>(mutable_column_device_view&,
+                                             column_device_view const&,
+                                             column_device_view const&,
+                                             bool is_lhs_scalar,
+                                             bool is_rhs_scalar,
+                                             rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/Greater.cu b/cpp/src/binaryop/compiled/Greater.cu
new file mode 100644
index 00000000000..679e029b5fc
--- /dev/null
+++ b/cpp/src/binaryop/compiled/Greater.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::Greater>(mutable_column_device_view&,
+                                            column_device_view const&,
+                                            column_device_view const&,
+                                            bool is_lhs_scalar,
+                                            bool is_rhs_scalar,
+                                            rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/GreaterEqual.cu b/cpp/src/binaryop/compiled/GreaterEqual.cu
new file mode 100644
index 00000000000..23b0c6aaa0d
--- /dev/null
+++ b/cpp/src/binaryop/compiled/GreaterEqual.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::GreaterEqual>(mutable_column_device_view&,
+                                                 column_device_view const&,
+                                                 column_device_view const&,
+                                                 bool is_lhs_scalar,
+                                                 bool is_rhs_scalar,
+                                                 rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/Less.cu b/cpp/src/binaryop/compiled/Less.cu
new file mode 100644
index 00000000000..7ab5dfe3478
--- /dev/null
+++ b/cpp/src/binaryop/compiled/Less.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::Less>(mutable_column_device_view&,
+                                         column_device_view const&,
+                                         column_device_view const&,
+                                         bool is_lhs_scalar,
+                                         bool is_rhs_scalar,
+                                         rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/LessEqual.cu b/cpp/src/binaryop/compiled/LessEqual.cu
new file mode 100644
index 00000000000..983c50c9575
--- /dev/null
+++ b/cpp/src/binaryop/compiled/LessEqual.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::LessEqual>(mutable_column_device_view&,
+                                              column_device_view const&,
+                                              column_device_view const&,
+                                              bool is_lhs_scalar,
+                                              bool is_rhs_scalar,
+                                              rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/LogBase.cu b/cpp/src/binaryop/compiled/LogBase.cu
new file mode 100644
index 00000000000..bdc709b86bf
--- /dev/null
+++ b/cpp/src/binaryop/compiled/LogBase.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::LogBase>(mutable_column_device_view&,
+                                            column_device_view const&,
+                                            column_device_view const&,
+                                            bool is_lhs_scalar,
+                                            bool is_rhs_scalar,
+                                            rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/LogicalAnd.cu b/cpp/src/binaryop/compiled/LogicalAnd.cu
new file mode 100644
index 00000000000..08112fadfff
--- /dev/null
+++ b/cpp/src/binaryop/compiled/LogicalAnd.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::LogicalAnd>(mutable_column_device_view&,
+                                               column_device_view const&,
+                                               column_device_view const&,
+                                               bool is_lhs_scalar,
+                                               bool is_rhs_scalar,
+                                               rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/LogicalOr.cu b/cpp/src/binaryop/compiled/LogicalOr.cu
new file mode 100644
index 00000000000..bc400afd4cd
--- /dev/null
+++ b/cpp/src/binaryop/compiled/LogicalOr.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::LogicalOr>(mutable_column_device_view&,
+                                              column_device_view const&,
+                                              column_device_view const&,
+                                              bool is_lhs_scalar,
+                                              bool is_rhs_scalar,
+                                              rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/Mod.cu b/cpp/src/binaryop/compiled/Mod.cu
new file mode 100644
index 00000000000..0b82c09c8a6
--- /dev/null
+++ b/cpp/src/binaryop/compiled/Mod.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::Mod>(mutable_column_device_view&,
+                                        column_device_view const&,
+                                        column_device_view const&,
+                                        bool is_lhs_scalar,
+                                        bool is_rhs_scalar,
+                                        rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/Mul.cu b/cpp/src/binaryop/compiled/Mul.cu
new file mode 100644
index 00000000000..15394245259
--- /dev/null
+++ b/cpp/src/binaryop/compiled/Mul.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::Mul>(mutable_column_device_view&,
+                                        column_device_view const&,
+                                        column_device_view const&,
+                                        bool is_lhs_scalar,
+                                        bool is_rhs_scalar,
+                                        rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/NullMax.cu b/cpp/src/binaryop/compiled/NullMax.cu
new file mode 100644
index 00000000000..78a44041cba
--- /dev/null
+++ b/cpp/src/binaryop/compiled/NullMax.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::NullMax>(mutable_column_device_view&,
+                                            column_device_view const&,
+                                            column_device_view const&,
+                                            bool is_lhs_scalar,
+                                            bool is_rhs_scalar,
+                                            rmm::cuda_stream_view);
+}  // namespace cudf::binops::compiled
diff --git a/cpp/src/binaryop/compiled/NullMin.cu b/cpp/src/binaryop/compiled/NullMin.cu
new file mode 100644
index 00000000000..629ab600fd7
--- /dev/null
+++ b/cpp/src/binaryop/compiled/NullMin.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::NullMin>(mutable_column_device_view&,
+                                            column_device_view const&,
+                                            column_device_view const&,
+                                            bool is_lhs_scalar,
+                                            bool is_rhs_scalar,
+                                            rmm::cuda_stream_view);
+}  // namespace cudf::binops::compiled
diff --git a/cpp/src/binaryop/compiled/PMod.cu b/cpp/src/binaryop/compiled/PMod.cu
new file mode 100644
index 00000000000..36902c0ed10
--- /dev/null
+++ b/cpp/src/binaryop/compiled/PMod.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::PMod>(mutable_column_device_view&,
+                                         column_device_view const&,
+                                         column_device_view const&,
+                                         bool is_lhs_scalar,
+                                         bool is_rhs_scalar,
+                                         rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/Pow.cu b/cpp/src/binaryop/compiled/Pow.cu
new file mode 100644
index 00000000000..c6f897ee18d
--- /dev/null
+++ b/cpp/src/binaryop/compiled/Pow.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::Pow>(mutable_column_device_view&,
+                                        column_device_view const&,
+                                        column_device_view const&,
+                                        bool is_lhs_scalar,
+                                        bool is_rhs_scalar,
+                                        rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/PyMod.cu b/cpp/src/binaryop/compiled/PyMod.cu
new file mode 100644
index 00000000000..b05dcd8e7bc
--- /dev/null
+++ b/cpp/src/binaryop/compiled/PyMod.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::PyMod>(mutable_column_device_view&,
+                                          column_device_view const&,
+                                          column_device_view const&,
+                                          bool is_lhs_scalar,
+                                          bool is_rhs_scalar,
+                                          rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/ShiftLeft.cu b/cpp/src/binaryop/compiled/ShiftLeft.cu
new file mode 100644
index 00000000000..6cc950b2d50
--- /dev/null
+++ b/cpp/src/binaryop/compiled/ShiftLeft.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::ShiftLeft>(mutable_column_device_view&,
+                                              column_device_view const&,
+                                              column_device_view const&,
+                                              bool is_lhs_scalar,
+                                              bool is_rhs_scalar,
+                                              rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/ShiftRight.cu b/cpp/src/binaryop/compiled/ShiftRight.cu
new file mode 100644
index 00000000000..1ddd7100a73
--- /dev/null
+++ b/cpp/src/binaryop/compiled/ShiftRight.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::ShiftRight>(mutable_column_device_view&,
+                                               column_device_view const&,
+                                               column_device_view const&,
+                                               bool is_lhs_scalar,
+                                               bool is_rhs_scalar,
+                                               rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/ShiftRightUnsigned.cu b/cpp/src/binaryop/compiled/ShiftRightUnsigned.cu
new file mode 100644
index 00000000000..a87b4b9f9ac
--- /dev/null
+++ b/cpp/src/binaryop/compiled/ShiftRightUnsigned.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::ShiftRightUnsigned>(mutable_column_device_view&,
+                                                       column_device_view const&,
+                                                       column_device_view const&,
+                                                       bool is_lhs_scalar,
+                                                       bool is_rhs_scalar,
+                                                       rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/Sub.cu b/cpp/src/binaryop/compiled/Sub.cu
new file mode 100644
index 00000000000..e0cf47c1310
--- /dev/null
+++ b/cpp/src/binaryop/compiled/Sub.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::Sub>(mutable_column_device_view&,
+                                        column_device_view const&,
+                                        column_device_view const&,
+                                        bool is_lhs_scalar,
+                                        bool is_rhs_scalar,
+                                        rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/TrueDiv.cu b/cpp/src/binaryop/compiled/TrueDiv.cu
new file mode 100644
index 00000000000..d8f1d956340
--- /dev/null
+++ b/cpp/src/binaryop/compiled/TrueDiv.cu
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+template void apply_binary_op<ops::TrueDiv>(mutable_column_device_view&,
+                                            column_device_view const&,
+                                            column_device_view const&,
+                                            bool is_lhs_scalar,
+                                            bool is_rhs_scalar,
+                                            rmm::cuda_stream_view);
+}
diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu
index 2b24e0cfa3d..1dd00c4b981 100644
--- a/cpp/src/binaryop/compiled/binary_ops.cu
+++ b/cpp/src/binaryop/compiled/binary_ops.cu
@@ -15,13 +15,12 @@
  */
 
 #include "binary_ops.hpp"
+#include "operation.cuh"
 
+#include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/iterator.cuh>
-#include <cudf/detail/null_mask.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
-#include <cudf/table/table_view.hpp>
-#include <cudf/utilities/span.hpp>
+#include <cudf/strings/detail/utilities.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -32,204 +31,76 @@ namespace binops {
 namespace compiled {
 
 namespace {
-
-template <typename Lhs, typename Rhs, typename Out>
-struct apply_binop {
-  binary_operator op;
-  apply_binop(binary_operator op) : op(op) {}
-  CUDA_DEVICE_CALLABLE Out operator()(Lhs const& x, Rhs const& y) const
-  {
-    switch (op) {
-      case binary_operator::EQUAL: return this->equal(x, y);
-      case binary_operator::NOT_EQUAL: return this->not_equal(x, y);
-      case binary_operator::LESS: return this->less(x, y);
-      case binary_operator::GREATER: return this->greater(x, y);
-      case binary_operator::LESS_EQUAL: return this->less_equal(x, y);
-      case binary_operator::GREATER_EQUAL: return this->greater_equal(x, y);
-      default: return Out{};
-    }
-  }
-  CUDA_DEVICE_CALLABLE Out equal(Lhs const& x, Rhs const& y) const
-  {
-    return static_cast<Out>(x == y);
-  }
-  CUDA_DEVICE_CALLABLE Out not_equal(Lhs const& x, Rhs const& y) const
-  {
-    return static_cast<Out>(x != y);
-  }
-  CUDA_DEVICE_CALLABLE Out less(Lhs const& x, Rhs const& y) const
-  {
-    return static_cast<Out>(x < y);
-  }
-  CUDA_DEVICE_CALLABLE Out greater(Lhs const& x, Rhs const& y) const
-  {
-    return static_cast<Out>(x > y);
-  }
-  CUDA_DEVICE_CALLABLE Out less_equal(Lhs const& x, Rhs const& y) const
-  {
-    return static_cast<Out>(x <= y);
-  }
-  CUDA_DEVICE_CALLABLE Out greater_equal(Lhs const& x, Rhs const& y) const
-  {
-    return static_cast<Out>(x >= y);
-  }
-};
-
-template <typename Lhs, typename Rhs, typename Out>
-struct apply_binop_scalar_lhs_rhs : apply_binop<Lhs, Rhs, Out> {
-  cudf::scalar_device_type_t<Rhs> scalar;
-  apply_binop_scalar_lhs_rhs(binary_operator op, cudf::scalar_device_type_t<Rhs> scalar)
-    : apply_binop<Lhs, Rhs, Out>(op), scalar(scalar)
-  {
-  }
-  CUDA_DEVICE_CALLABLE Out operator()(Lhs const& x) const
-  {
-    return apply_binop<Lhs, Rhs, Out>::operator()(x, scalar.value());
-  }
-};
-
-template <typename Lhs, typename Rhs, typename Out>
-struct apply_binop_scalar_rhs_lhs : apply_binop<Rhs, Lhs, Out> {
-  cudf::scalar_device_type_t<Rhs> scalar;
-  apply_binop_scalar_rhs_lhs(binary_operator op, cudf::scalar_device_type_t<Rhs> scalar)
-    : apply_binop<Rhs, Lhs, Out>(op), scalar(scalar)
+/**
+ * @brief Converts scalar to column_device_view with single element.
+ *
+ * @return pair with column_device_view and column containing any auxilary data to create
+ * column_view from scalar
+ */
+struct scalar_as_column_device_view {
+  using return_type = typename std::pair<decltype(column_device_view::create(column_view{})),
+                                         std::unique_ptr<column>>;
+  template <typename T, std::enable_if_t<(is_fixed_width<T>())>* = nullptr>
+  return_type operator()(scalar const& s,
+                         rmm::cuda_stream_view stream,
+                         rmm::mr::device_memory_resource* mr)
   {
+    auto h_scalar_type_view = static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(s));
+    auto col_v =
+      column_view(s.type(), 1, h_scalar_type_view.data(), (bitmask_type const*)s.validity_data());
+    return std::pair{column_device_view::create(col_v, stream), std::unique_ptr<column>(nullptr)};
   }
-  CUDA_DEVICE_CALLABLE Out operator()(Lhs const& x) const
+  template <typename T, std::enable_if_t<(!is_fixed_width<T>())>* = nullptr>
+  return_type operator()(scalar const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)
   {
-    return apply_binop<Rhs, Lhs, Out>::operator()(scalar.value(), x);
+    CUDF_FAIL("Unsupported type");
   }
 };
+// specialization for cudf::string_view
+template <>
+scalar_as_column_device_view::return_type
+scalar_as_column_device_view::operator()<cudf::string_view>(scalar const& s,
+                                                            rmm::cuda_stream_view stream,
+                                                            rmm::mr::device_memory_resource* mr)
+{
+  using T                 = cudf::string_view;
+  auto h_scalar_type_view = static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(s));
+
+  // build offsets column from the string size
+  auto offsets_transformer_itr =
+    thrust::make_constant_iterator<size_type>(h_scalar_type_view.size());
+  auto offsets_column = strings::detail::make_offsets_child_column(
+    offsets_transformer_itr, offsets_transformer_itr + 1, stream, mr);
+
+  auto chars_column_v =
+    column_view(data_type{type_id::INT8}, h_scalar_type_view.size(), h_scalar_type_view.data());
+  // Construct string column_view
+  auto col_v = column_view(s.type(),
+                           1,
+                           nullptr,
+                           (bitmask_type const*)s.validity_data(),
+                           cudf::UNKNOWN_NULL_COUNT,
+                           0,
+                           {offsets_column->view(), chars_column_v});
+  return std::pair{column_device_view::create(col_v, stream), std::move(offsets_column)};
+}
 
-template <typename Lhs, typename Rhs, typename Out>
-struct binary_op {
-  std::unique_ptr<column> operator()(column_view const& lhs,
-                                     scalar const& rhs,
-                                     binary_operator op,
-                                     data_type out_type,
-                                     bool const reversed,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::mr::device_memory_resource* mr)
-  {
-    auto new_mask = binops::detail::scalar_col_valid_mask_and(lhs, rhs, stream, mr);
-    auto out      = make_fixed_width_column(out_type,
-                                       lhs.size(),
-                                       std::move(new_mask),
-                                       rhs.is_valid(stream) ? cudf::UNKNOWN_NULL_COUNT : lhs.size(),
-                                       stream,
-                                       mr);
-
-    if (lhs.size() > 0 && rhs.is_valid(stream)) {
-      auto out_view        = out->mutable_view();
-      auto out_itr         = out_view.begin<Out>();
-      auto lhs_device_view = column_device_view::create(lhs, stream);
-      using rhs_type       = cudf::scalar_type_t<Rhs>;
-      auto rhs_scalar      = rhs_type(static_cast<rhs_type const&>(rhs), stream);
-      auto rhs_scalar_view = get_scalar_device_view(rhs_scalar);
-      if (lhs.has_nulls()) {
-        auto lhs_itr = cudf::detail::make_null_replacement_iterator(*lhs_device_view, Lhs{});
-        reversed
-          ? thrust::transform(rmm::exec_policy(stream),
-                              lhs_itr,
-                              lhs_itr + lhs.size(),
-                              out_itr,
-                              apply_binop_scalar_rhs_lhs<Lhs, Rhs, Out>{op, rhs_scalar_view})
-          : thrust::transform(rmm::exec_policy(stream),
-                              lhs_itr,
-                              lhs_itr + lhs.size(),
-                              out_itr,
-                              apply_binop_scalar_lhs_rhs<Lhs, Rhs, Out>{op, rhs_scalar_view});
-      } else {
-        auto lhs_itr = thrust::make_transform_iterator(
-          thrust::make_counting_iterator(size_type{0}),
-          [col = *lhs_device_view] __device__(size_type i) { return col.element<Lhs>(i); });
-        reversed
-          ? thrust::transform(rmm::exec_policy(stream),
-                              lhs_itr,
-                              lhs_itr + lhs.size(),
-                              out_itr,
-                              apply_binop_scalar_rhs_lhs<Lhs, Rhs, Out>{op, rhs_scalar_view})
-          : thrust::transform(rmm::exec_policy(stream),
-                              lhs_itr,
-                              lhs_itr + lhs.size(),
-                              out_itr,
-                              apply_binop_scalar_lhs_rhs<Lhs, Rhs, Out>{op, rhs_scalar_view});
-      }
-    }
-
-    CHECK_CUDA(stream.value());
-
-    return out;
-  }
-
-  std::unique_ptr<column> operator()(column_view const& lhs,
-                                     column_view const& rhs,
-                                     binary_operator op,
-                                     data_type out_type,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::mr::device_memory_resource* mr)
-  {
-    auto new_mask = cudf::detail::bitmask_and(table_view({lhs, rhs}), stream, mr);
-    auto out      = make_fixed_width_column(
-      out_type, lhs.size(), std::move(new_mask), cudf::UNKNOWN_NULL_COUNT, stream, mr);
-
-    if (lhs.size() > 0) {
-      auto out_view        = out->mutable_view();
-      auto out_itr         = out_view.begin<Out>();
-      auto lhs_device_view = column_device_view::create(lhs, stream);
-      auto rhs_device_view = column_device_view::create(rhs, stream);
-      if (lhs.has_nulls() && rhs.has_nulls()) {
-        auto lhs_itr = cudf::detail::make_null_replacement_iterator(*lhs_device_view, Lhs{});
-        auto rhs_itr = cudf::detail::make_null_replacement_iterator(*rhs_device_view, Rhs{});
-        thrust::transform(rmm::exec_policy(stream),
-                          lhs_itr,
-                          lhs_itr + lhs.size(),
-                          rhs_itr,
-                          out_itr,
-                          apply_binop<Lhs, Rhs, Out>{op});
-      } else if (lhs.has_nulls()) {
-        auto lhs_itr = cudf::detail::make_null_replacement_iterator(*lhs_device_view, Lhs{});
-        auto rhs_itr = thrust::make_transform_iterator(
-          thrust::make_counting_iterator(size_type{0}),
-          [col = *rhs_device_view] __device__(size_type i) { return col.element<Rhs>(i); });
-        thrust::transform(rmm::exec_policy(stream),
-                          lhs_itr,
-                          lhs_itr + lhs.size(),
-                          rhs_itr,
-                          out_itr,
-                          apply_binop<Lhs, Rhs, Out>{op});
-      } else if (rhs.has_nulls()) {
-        auto lhs_itr = thrust::make_transform_iterator(
-          thrust::make_counting_iterator(size_type{0}),
-          [col = *lhs_device_view] __device__(size_type i) { return col.element<Lhs>(i); });
-        auto rhs_itr = cudf::detail::make_null_replacement_iterator(*rhs_device_view, Rhs{});
-        thrust::transform(rmm::exec_policy(stream),
-                          lhs_itr,
-                          lhs_itr + lhs.size(),
-                          rhs_itr,
-                          out_itr,
-                          apply_binop<Lhs, Rhs, Out>{op});
-      } else {
-        auto lhs_itr = thrust::make_transform_iterator(
-          thrust::make_counting_iterator(size_type{0}),
-          [col = *lhs_device_view] __device__(size_type i) { return col.element<Lhs>(i); });
-        auto rhs_itr = thrust::make_transform_iterator(
-          thrust::make_counting_iterator(size_type{0}),
-          [col = *rhs_device_view] __device__(size_type i) { return col.element<Rhs>(i); });
-        thrust::transform(rmm::exec_policy(stream),
-                          lhs_itr,
-                          lhs_itr + lhs.size(),
-                          rhs_itr,
-                          out_itr,
-                          apply_binop<Lhs, Rhs, Out>{op});
-      }
-    }
-
-    CHECK_CUDA(stream.value());
-
-    return out;
-  }
-};
+/**
+ * @brief Converts scalar to column_device_view with single element.
+ *
+ * @param scal    scalar to convert
+ * @param stream  CUDA stream used for device memory operations and kernel launches.
+ * @param mr      Device memory resource used to allocate the returned column's device memory
+ * @return        pair with column_device_view and column containing any auxilary data to create
+ * column_view from scalar
+ */
+auto scalar_to_column_device_view(
+  scalar const& scal,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+{
+  return type_dispatcher(scal.type(), scalar_as_column_device_view{}, scal, stream, mr);
+}
 
 // This functor does the actual comparison between string column value and a scalar string
 // or between two string column values using a comparator
@@ -337,152 +208,181 @@ struct null_considering_binop {
     // Create device views for inputs
     auto const lhs_dev_view = get_device_view(lhs);
     auto const rhs_dev_view = get_device_view(rhs);
-
-    switch (op) {
-      case binary_operator::NULL_EQUALS: {
-        // Validate input
-        CUDF_EXPECTS(output_type.id() == type_id::BOOL8, "Output column type has to be bool");
-
-        // Make a bool8 numeric output column
-        out = make_numeric_column(
-          data_type{type_id::BOOL8}, col_size, mask_state::ALL_VALID, stream, mr);
-
-        // Create a compare function lambda
-        auto equal_func = [] __device__(bool lhs_valid,
-                                        bool rhs_valid,
-                                        cudf::string_view lhs_value,
-                                        cudf::string_view rhs_value) {
-          if (!lhs_valid && !rhs_valid) return true;
-          if (lhs_valid && rhs_valid) return (lhs_value == rhs_value);
-          return false;
-        };
-
-        // Populate output column
-        populate_out_col(lhs_dev_view,
-                         rhs_dev_view,
-                         col_size,
-                         stream,
-                         equal_func,
-                         mutable_column_view{*out}.begin<bool>());
-
-        break;
-      }
-
-      case binary_operator::NULL_MAX:
-      case binary_operator::NULL_MIN: {
-        // Validate input
-        CUDF_EXPECTS(output_type.id() == lhs.type().id(),
-                     "Output column type should match input column type");
-
-        // Shallow copy of the resultant strings
-        rmm::device_uvector<cudf::string_view> out_col_strings(col_size, stream);
-
-        // Invalid output column strings - null rows
-        cudf::string_view const invalid_str{nullptr, 0};
-
-        // Create a compare function lambda
-        auto minmax_func = [op, invalid_str] __device__(bool lhs_valid,
-                                                        bool rhs_valid,
-                                                        cudf::string_view lhs_value,
-                                                        cudf::string_view rhs_value) {
-          if (!lhs_valid && !rhs_valid)
-            return invalid_str;
-          else if (lhs_valid && rhs_valid) {
-            return (op == binary_operator::NULL_MAX)
-                     ? thrust::maximum<cudf::string_view>()(lhs_value, rhs_value)
-                     : thrust::minimum<cudf::string_view>()(lhs_value, rhs_value);
-          } else if (lhs_valid)
-            return lhs_value;
-          else
-            return rhs_value;
-        };
-
-        // Populate output column
-        populate_out_col(
-          lhs_dev_view, rhs_dev_view, col_size, stream, minmax_func, out_col_strings.data());
-
-        // Create an output column with the resultant strings
-        out = cudf::make_strings_column(out_col_strings, invalid_str, stream, mr);
-
-        break;
-      }
-
-      default: {
-        CUDF_FAIL("Null aware binop not supported");
-      }
-    }
-
-    return out;
+    // Validate input
+    CUDF_EXPECTS(output_type.id() == lhs.type().id(),
+                 "Output column type should match input column type");
+
+    // Shallow copy of the resultant strings
+    rmm::device_uvector<cudf::string_view> out_col_strings(col_size, stream);
+
+    // Invalid output column strings - null rows
+    cudf::string_view const invalid_str{nullptr, 0};
+
+    // Create a compare function lambda
+    auto minmax_func =
+      [op, invalid_str] __device__(
+        bool lhs_valid, bool rhs_valid, cudf::string_view lhs_value, cudf::string_view rhs_value) {
+        if (!lhs_valid && !rhs_valid)
+          return invalid_str;
+        else if (lhs_valid && rhs_valid) {
+          return (op == binary_operator::NULL_MAX)
+                   ? thrust::maximum<cudf::string_view>()(lhs_value, rhs_value)
+                   : thrust::minimum<cudf::string_view>()(lhs_value, rhs_value);
+        } else if (lhs_valid)
+          return lhs_value;
+        else
+          return rhs_value;
+      };
+
+    // Populate output column
+    populate_out_col(
+      lhs_dev_view, rhs_dev_view, col_size, stream, minmax_func, out_col_strings.data());
+
+    // Create an output column with the resultant strings
+    return cudf::make_strings_column(out_col_strings, invalid_str, stream, mr);
   }
 };
 
 }  // namespace
 
-std::unique_ptr<column> binary_operation(scalar const& lhs,
-                                         column_view const& rhs,
-                                         binary_operator op,
-                                         data_type output_type,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> string_null_min_max(scalar const& lhs,
+                                            column_view const& rhs,
+                                            binary_operator op,
+                                            data_type output_type,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::mr::device_memory_resource* mr)
 {
   // hard-coded to only work with cudf::string_view so we don't explode compile times
   CUDF_EXPECTS(lhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported lhs datatype");
   CUDF_EXPECTS(rhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported rhs datatype");
-  if (is_null_dependent(op)) {
-    if (rhs.is_empty()) return cudf::make_empty_column(output_type);
-    auto rhs_device_view = cudf::column_device_view::create(rhs, stream);
-    return null_considering_binop{}(lhs, *rhs_device_view, op, output_type, rhs.size(), stream, mr);
-  } else {
-    CUDF_EXPECTS(is_boolean(output_type), "Invalid/Unsupported output datatype");
-    // Should pass the right type of scalar and column_view when specializing binary_op
-    return binary_op<cudf::string_view, cudf::string_view, bool>{}(
-      rhs, lhs, op, output_type, true, stream, mr);
-  }
+  CUDF_EXPECTS(op == binary_operator::NULL_MAX or op == binary_operator::NULL_MIN,
+               "Unsupported binary operation");
+  if (rhs.is_empty()) return cudf::make_empty_column(output_type);
+  auto rhs_device_view = cudf::column_device_view::create(rhs, stream);
+  return null_considering_binop{}(lhs, *rhs_device_view, op, output_type, rhs.size(), stream, mr);
 }
 
-std::unique_ptr<column> binary_operation(column_view const& lhs,
-                                         scalar const& rhs,
-                                         binary_operator op,
-                                         data_type output_type,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> string_null_min_max(column_view const& lhs,
+                                            scalar const& rhs,
+                                            binary_operator op,
+                                            data_type output_type,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::mr::device_memory_resource* mr)
 {
   // hard-coded to only work with cudf::string_view so we don't explode compile times
   CUDF_EXPECTS(lhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported lhs datatype");
   CUDF_EXPECTS(rhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported rhs datatype");
-  if (is_null_dependent(op)) {
-    if (lhs.is_empty()) return cudf::make_empty_column(output_type);
-    auto lhs_device_view = cudf::column_device_view::create(lhs, stream);
-    return null_considering_binop{}(*lhs_device_view, rhs, op, output_type, lhs.size(), stream, mr);
-  } else {
-    CUDF_EXPECTS(is_boolean(output_type), "Invalid/Unsupported output datatype");
-    return binary_op<cudf::string_view, cudf::string_view, bool>{}(
-      lhs, rhs, op, output_type, false, stream, mr);
-  }
+  CUDF_EXPECTS(op == binary_operator::NULL_MAX or op == binary_operator::NULL_MIN,
+               "Unsupported binary operation");
+  if (lhs.is_empty()) return cudf::make_empty_column(output_type);
+  auto lhs_device_view = cudf::column_device_view::create(lhs, stream);
+  return null_considering_binop{}(*lhs_device_view, rhs, op, output_type, lhs.size(), stream, mr);
 }
 
-std::unique_ptr<column> binary_operation(column_view const& lhs,
-                                         column_view const& rhs,
-                                         binary_operator op,
-                                         data_type output_type,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> string_null_min_max(column_view const& lhs,
+                                            column_view const& rhs,
+                                            binary_operator op,
+                                            data_type output_type,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::mr::device_memory_resource* mr)
 {
   // hard-coded to only work with cudf::string_view so we don't explode compile times
   CUDF_EXPECTS(lhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported lhs datatype");
   CUDF_EXPECTS(rhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported rhs datatype");
-  if (is_null_dependent(op)) {
-    CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes do not match");
-    if (lhs.is_empty()) return cudf::make_empty_column(output_type);
-    auto lhs_device_view = cudf::column_device_view::create(lhs, stream);
-    auto rhs_device_view = cudf::column_device_view::create(rhs, stream);
-    return null_considering_binop{}(
-      *lhs_device_view, *rhs_device_view, op, output_type, lhs.size(), stream, mr);
-  } else {
-    CUDF_EXPECTS(is_boolean(output_type), "Invalid/Unsupported output datatype");
-    return binary_op<cudf::string_view, cudf::string_view, bool>{}(
-      lhs, rhs, op, output_type, stream, mr);
-  }
+  CUDF_EXPECTS(op == binary_operator::NULL_MAX or op == binary_operator::NULL_MIN,
+               "Unsupported binary operation");
+  CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes do not match");
+  if (lhs.is_empty()) return cudf::make_empty_column(output_type);
+  auto lhs_device_view = cudf::column_device_view::create(lhs, stream);
+  auto rhs_device_view = cudf::column_device_view::create(rhs, stream);
+  return null_considering_binop{}(
+    *lhs_device_view, *rhs_device_view, op, output_type, lhs.size(), stream, mr);
+}
+
+void operator_dispatcher(mutable_column_device_view& out,
+                         column_device_view const& lhs,
+                         column_device_view const& rhs,
+                         bool is_lhs_scalar,
+                         bool is_rhs_scalar,
+                         binary_operator op,
+                         rmm::cuda_stream_view stream)
+{
+  // clang-format off
+switch (op) {
+case binary_operator::ADD:                  apply_binary_op<ops::Add>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::SUB:                  apply_binary_op<ops::Sub>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::MUL:                  apply_binary_op<ops::Mul>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::DIV:                  apply_binary_op<ops::Div>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::TRUE_DIV:             apply_binary_op<ops::TrueDiv>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::FLOOR_DIV:            apply_binary_op<ops::FloorDiv>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::MOD:                  apply_binary_op<ops::Mod>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::PYMOD:                apply_binary_op<ops::PyMod>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::POW:                  apply_binary_op<ops::Pow>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::EQUAL:
+case binary_operator::NOT_EQUAL:
+case binary_operator::NULL_EQUALS:
+if(out.type().id() != type_id::BOOL8) CUDF_FAIL("Output type of Comparison operator should be bool type");
+dispatch_equality_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, op, stream); break;
+case binary_operator::LESS:                 apply_binary_op<ops::Less>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::GREATER:              apply_binary_op<ops::Greater>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::LESS_EQUAL:           apply_binary_op<ops::LessEqual>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::GREATER_EQUAL:        apply_binary_op<ops::GreaterEqual>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::BITWISE_AND:          apply_binary_op<ops::BitwiseAnd>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::BITWISE_OR:           apply_binary_op<ops::BitwiseOr>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::BITWISE_XOR:          apply_binary_op<ops::BitwiseXor>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::LOGICAL_AND:          apply_binary_op<ops::LogicalAnd>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::LOGICAL_OR:           apply_binary_op<ops::LogicalOr>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+/*
+case binary_operator::GENERIC_BINARY:      // Cannot be compiled, should be called by jit::binary_operation
+*/
+case binary_operator::SHIFT_LEFT:           apply_binary_op<ops::ShiftLeft>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::SHIFT_RIGHT:          apply_binary_op<ops::ShiftRight>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::SHIFT_RIGHT_UNSIGNED: apply_binary_op<ops::ShiftRightUnsigned>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::LOG_BASE:             apply_binary_op<ops::LogBase>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::ATAN2:                apply_binary_op<ops::ATan2>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::PMOD:                 apply_binary_op<ops::PMod>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::NULL_MAX:             apply_binary_op<ops::NullMax>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+case binary_operator::NULL_MIN:             apply_binary_op<ops::NullMin>(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break;
+default:;
+}
+  // clang-format on
+}
+
+// vector_vector
+void binary_operation(mutable_column_view& out,
+                      column_view const& lhs,
+                      column_view const& rhs,
+                      binary_operator op,
+                      rmm::cuda_stream_view stream)
+{
+  auto lhsd = column_device_view::create(lhs, stream);
+  auto rhsd = column_device_view::create(rhs, stream);
+  auto outd = mutable_column_device_view::create(out, stream);
+  operator_dispatcher(*outd, *lhsd, *rhsd, false, false, op, stream);
+}
+// scalar_vector
+void binary_operation(mutable_column_view& out,
+                      scalar const& lhs,
+                      column_view const& rhs,
+                      binary_operator op,
+                      rmm::cuda_stream_view stream)
+{
+  auto [lhsd, aux] = scalar_to_column_device_view(lhs, stream);
+  auto rhsd        = column_device_view::create(rhs, stream);
+  auto outd        = mutable_column_device_view::create(out, stream);
+  operator_dispatcher(*outd, *lhsd, *rhsd, true, false, op, stream);
+}
+// vector_scalar
+void binary_operation(mutable_column_view& out,
+                      column_view const& lhs,
+                      scalar const& rhs,
+                      binary_operator op,
+                      rmm::cuda_stream_view stream)
+{
+  auto lhsd        = column_device_view::create(lhs, stream);
+  auto [rhsd, aux] = scalar_to_column_device_view(rhs, stream);
+  auto outd        = mutable_column_device_view::create(out, stream);
+  operator_dispatcher(*outd, *lhsd, *rhsd, false, true, op, stream);
 }
 
 }  // namespace compiled
diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh
new file mode 100644
index 00000000000..b17f3eddc5d
--- /dev/null
+++ b/cpp/src/binaryop/compiled/binary_ops.cuh
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "binary_ops.hpp"
+#include "operation.cuh"
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/utilities/integer_utils.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+namespace cudf {
+namespace binops {
+namespace compiled {
+
+template <typename BinaryOperator, typename TypeLhs, typename TypeRhs>
+constexpr bool is_bool_result()
+{
+  using ReturnType = std::invoke_result_t<BinaryOperator, TypeLhs, TypeRhs>;
+  return std::is_same_v<bool, ReturnType>;
+}
+
+/**
+ * @brief Type casts each element of the column to `CastType`
+ *
+ */
+template <typename CastType>
+struct type_casted_accessor {
+  template <typename Element>
+  CUDA_DEVICE_CALLABLE CastType operator()(cudf::size_type i,
+                                           column_device_view const& col,
+                                           bool is_scalar) const
+  {
+    if constexpr (column_device_view::has_element_accessor<Element>() and
+                  std::is_convertible_v<Element, CastType>)
+      return static_cast<CastType>(col.element<Element>(is_scalar ? 0 : i));
+    return {};
+  }
+};
+
+/**
+ * @brief Type casts value to column type and stores in `i`th row of the column
+ *
+ */
+template <typename FromType>
+struct typed_casted_writer {
+  template <typename Element>
+  CUDA_DEVICE_CALLABLE void operator()(cudf::size_type i,
+                                       mutable_column_device_view const& col,
+                                       FromType val) const
+  {
+    if constexpr (mutable_column_device_view::has_element_accessor<Element>() and
+                  std::is_constructible_v<Element, FromType>) {
+      col.element<Element>(i) = static_cast<Element>(val);
+    } else if constexpr (is_fixed_point<Element>() and std::is_constructible_v<Element, FromType>) {
+      if constexpr (is_fixed_point<FromType>())
+        col.data<Element::rep>()[i] = val.rescaled(numeric::scale_type{col.type().scale()}).value();
+      else
+        col.data<Element::rep>()[i] = Element{val, numeric::scale_type{col.type().scale()}}.value();
+    }
+  }
+};
+
+// Functors to launch only defined operations.
+
+/**
+ * @brief Functor to launch only defined operations with common type.
+ *
+ * @tparam BinaryOperator binary operator functor
+ */
+template <typename BinaryOperator>
+struct ops_wrapper {
+  mutable_column_device_view& out;
+  column_device_view const& lhs;
+  column_device_view const& rhs;
+  bool const& is_lhs_scalar;
+  bool const& is_rhs_scalar;
+  template <typename TypeCommon>
+  __device__ void operator()(size_type i)
+  {
+    if constexpr (std::is_invocable_v<BinaryOperator, TypeCommon, TypeCommon>) {
+      TypeCommon x =
+        type_dispatcher(lhs.type(), type_casted_accessor<TypeCommon>{}, i, lhs, is_lhs_scalar);
+      TypeCommon y =
+        type_dispatcher(rhs.type(), type_casted_accessor<TypeCommon>{}, i, rhs, is_rhs_scalar);
+      auto result = [&]() {
+        if constexpr (std::is_same_v<BinaryOperator, ops::NullEquals> or
+                      std::is_same_v<BinaryOperator, ops::NullMax> or
+                      std::is_same_v<BinaryOperator, ops::NullMin>) {
+          bool output_valid = false;
+          auto result       = BinaryOperator{}.template operator()<TypeCommon, TypeCommon>(
+            x,
+            y,
+            lhs.is_valid(is_lhs_scalar ? 0 : i),
+            rhs.is_valid(is_rhs_scalar ? 0 : i),
+            output_valid);
+          if (out.nullable() && !output_valid) out.set_null(i);
+          return result;
+        } else {
+          return BinaryOperator{}.template operator()<TypeCommon, TypeCommon>(x, y);
+        }
+        // To supress nvcc warning
+        return std::invoke_result_t<BinaryOperator, TypeCommon, TypeCommon>{};
+      }();
+      if constexpr (is_bool_result<BinaryOperator, TypeCommon, TypeCommon>())
+        out.element<decltype(result)>(i) = result;
+      else
+        type_dispatcher(out.type(), typed_casted_writer<decltype(result)>{}, i, out, result);
+    }
+    (void)i;
+  }
+};
+
+/**
+ * @brief Functor to launch only defined operations without common type.
+ *
+ * @tparam BinaryOperator binary operator functor
+ */
+template <typename BinaryOperator>
+struct ops2_wrapper {
+  mutable_column_device_view& out;
+  column_device_view const& lhs;
+  column_device_view const& rhs;
+  bool const& is_lhs_scalar;
+  bool const& is_rhs_scalar;
+  template <typename TypeLhs, typename TypeRhs>
+  __device__ void operator()(size_type i)
+  {
+    if constexpr (!has_common_type_v<TypeLhs, TypeRhs> and
+                  std::is_invocable_v<BinaryOperator, TypeLhs, TypeRhs>) {
+      TypeLhs x   = lhs.element<TypeLhs>(is_lhs_scalar ? 0 : i);
+      TypeRhs y   = rhs.element<TypeRhs>(is_rhs_scalar ? 0 : i);
+      auto result = [&]() {
+        if constexpr (std::is_same_v<BinaryOperator, ops::NullEquals> or
+                      std::is_same_v<BinaryOperator, ops::NullMax> or
+                      std::is_same_v<BinaryOperator, ops::NullMin>) {
+          bool output_valid = false;
+          auto result       = BinaryOperator{}.template operator()<TypeLhs, TypeRhs>(
+            x,
+            y,
+            lhs.is_valid(is_lhs_scalar ? 0 : i),
+            rhs.is_valid(is_rhs_scalar ? 0 : i),
+            output_valid);
+          if (out.nullable() && !output_valid) out.set_null(i);
+          return result;
+        } else {
+          return BinaryOperator{}.template operator()<TypeLhs, TypeRhs>(x, y);
+        }
+        // To supress nvcc warning
+        return std::invoke_result_t<BinaryOperator, TypeLhs, TypeRhs>{};
+      }();
+      if constexpr (is_bool_result<BinaryOperator, TypeLhs, TypeRhs>())
+        out.element<decltype(result)>(i) = result;
+      else
+        type_dispatcher(out.type(), typed_casted_writer<decltype(result)>{}, i, out, result);
+    }
+    (void)i;
+  }
+};
+
+/**
+ * @brief Functor which does single, and double type dispatcher in device code
+ *
+ * single type dispatcher for lhs and rhs with common types.
+ * double type dispatcher for lhs and rhs without common types.
+ *
+ * @tparam BinaryOperator binary operator functor
+ */
+template <class BinaryOperator>
+struct device_type_dispatcher {
+  mutable_column_device_view out;
+  column_device_view lhs;
+  column_device_view rhs;
+  bool is_lhs_scalar;
+  bool is_rhs_scalar;
+  std::optional<data_type> common_data_type;
+
+  __device__ void operator()(size_type i)
+  {
+    if (common_data_type) {
+      type_dispatcher(*common_data_type,
+                      ops_wrapper<BinaryOperator>{out, lhs, rhs, is_lhs_scalar, is_rhs_scalar},
+                      i);
+    } else {
+      double_type_dispatcher(
+        lhs.type(),
+        rhs.type(),
+        ops2_wrapper<BinaryOperator>{out, lhs, rhs, is_lhs_scalar, is_rhs_scalar},
+        i);
+    }
+  }
+};
+
+/**
+ * @brief Simplified for_each kernel
+ *
+ * @param size number of elements to process.
+ * @param f Functor object to call for each element.
+ */
+template <typename Functor>
+__global__ void for_each_kernel(cudf::size_type size, Functor f)
+{
+  int tid    = threadIdx.x;
+  int blkid  = blockIdx.x;
+  int blksz  = blockDim.x;
+  int gridsz = gridDim.x;
+
+  int start = tid + blkid * blksz;
+  int step  = blksz * gridsz;
+
+#pragma unroll
+  for (cudf::size_type i = start; i < size; i += step) {
+    f(i);
+  }
+}
+
+/**
+ * @brief Launches Simplified for_each kernel with maximum occupancy grid dimensions.
+ *
+ * @tparam Functor
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param size number of elements to process.
+ * @param f Functor object to call for each element.
+ */
+template <typename Functor>
+void for_each(rmm::cuda_stream_view stream, cudf::size_type size, Functor f)
+{
+  int block_size;
+  int min_grid_size;
+  CUDA_TRY(
+    cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, for_each_kernel<decltype(f)>));
+  // 2 elements per thread.
+  const int grid_size = util::div_rounding_up_safe(size, 2 * block_size);
+  for_each_kernel<<<grid_size, block_size, 0, stream.value()>>>(size, std::forward<Functor&&>(f));
+}
+
+template <class BinaryOperator>
+void apply_binary_op(mutable_column_device_view& outd,
+                     column_device_view const& lhsd,
+                     column_device_view const& rhsd,
+                     bool is_lhs_scalar,
+                     bool is_rhs_scalar,
+                     rmm::cuda_stream_view stream)
+{
+  auto common_dtype = get_common_type(outd.type(), lhsd.type(), rhsd.type());
+
+  // Create binop functor instance
+  auto binop_func = device_type_dispatcher<BinaryOperator>{
+    outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar, common_dtype};
+  // Execute it on every element
+  for_each(stream, outd.size(), binop_func);
+}
+
+}  // namespace compiled
+}  // namespace binops
+}  // namespace cudf
diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp
index a3f62f5018e..2a814c16d57 100644
--- a/cpp/src/binaryop/compiled/binary_ops.hpp
+++ b/cpp/src/binaryop/compiled/binary_ops.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,13 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <optional>
+
 namespace cudf {
+// Forward declarations
+class column_device_view;
+class mutable_column_device_view;
+
 namespace binops {
 namespace detail {
 /**
@@ -45,6 +51,30 @@ inline bool is_null_dependent(binary_operator op)
 
 namespace compiled {
 
+std::unique_ptr<column> string_null_min_max(
+  scalar const& lhs,
+  column_view const& rhs,
+  binary_operator op,
+  data_type output_type,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+std::unique_ptr<column> string_null_min_max(
+  column_view const& lhs,
+  scalar const& rhs,
+  binary_operator op,
+  data_type output_type,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+std::unique_ptr<column> string_null_min_max(
+  column_view const& lhs,
+  column_view const& rhs,
+  binary_operator op,
+  data_type output_type,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs a binary operation between a string scalar and a string
  * column.
@@ -123,6 +153,89 @@ std::unique_ptr<column> binary_operation(
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+void binary_operation(mutable_column_view& out,
+                      scalar const& lhs,
+                      column_view const& rhs,
+                      binary_operator op,
+                      rmm::cuda_stream_view stream);
+void binary_operation(mutable_column_view& out,
+                      column_view const& lhs,
+                      scalar const& rhs,
+                      binary_operator op,
+                      rmm::cuda_stream_view stream);
+void binary_operation(mutable_column_view& out,
+                      column_view const& lhs,
+                      column_view const& rhs,
+                      binary_operator op,
+                      rmm::cuda_stream_view stream);
+
+// Defined in util.cpp
+/**
+ * @brief Get the common type among all input types.
+ *
+ * @param out type 1
+ * @param lhs type 2
+ * @param rhs type 3
+ * @return common type among @p out, @p lhs, @p rhs.
+ */
+std::optional<data_type> get_common_type(data_type out, data_type lhs, data_type rhs);
+/**
+ * @brief Check if input binary operation is supported for the given input and output types.
+ *
+ * @param out output type of the binary operation
+ * @param lhs first operand type of the binary operation
+ * @param rhs second operand type of the binary operation
+ * @param op binary operator enum.
+ * @return true if given binary operator supports given input and output types.
+ */
+bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op);
+
+// Defined in individual .cu files.
+/**
+ * @brief Deploys single type or double type dispatcher that runs binary operation on each element
+ * of @p lhsd and @p rhsd columns.
+ *
+ * This template is instantiated for each binary operator.
+ *
+ * @tparam BinaryOperator Binary operator functor
+ * @param outd mutable device view of output column
+ * @param lhsd device view of left operand column
+ * @param rhsd device view of right operand column
+ * @param is_lhs_scalar true if @p lhsd is a single element column representing a scalar
+ * @param is_rhs_scalar true if @p rhsd is a single element column representing a scalar
+ * @param stream CUDA stream used for device memory operations
+ */
+template <class BinaryOperator>
+void apply_binary_op(mutable_column_device_view&,
+                     column_device_view const&,
+                     column_device_view const&,
+                     bool is_lhs_scalar,
+                     bool is_rhs_scalar,
+                     rmm::cuda_stream_view stream);
+/**
+ * @brief Deploys single type or double type dispatcher that runs equality operation on each element
+ * of @p lhsd and @p rhsd columns.
+ *
+ * Comparison operators are EQUAL, NOT_EQUAL, NULL_EQUALS.
+ * @p outd type is boolean.
+ *
+ * This template is instantiated for each binary operator.
+ *
+ * @param outd mutable device view of output column
+ * @param lhsd device view of left operand column
+ * @param rhsd device view of right operand column
+ * @param is_lhs_scalar true if @p lhsd is a single element column representing a scalar
+ * @param is_rhs_scalar true if @p rhsd is a single element column representing a scalar
+ * @param op comparison binary operator
+ * @param stream CUDA stream used for device memory operations
+ */
+void dispatch_equality_op(mutable_column_device_view& outd,
+                          column_device_view const& lhsd,
+                          column_device_view const& rhsd,
+                          bool is_lhs_scalar,
+                          bool is_rhs_scalar,
+                          binary_operator op,
+                          rmm::cuda_stream_view stream);
 }  // namespace compiled
 }  // namespace binops
 }  // namespace cudf
diff --git a/cpp/src/binaryop/compiled/equality_ops.cu b/cpp/src/binaryop/compiled/equality_ops.cu
new file mode 100644
index 00000000000..feee310716a
--- /dev/null
+++ b/cpp/src/binaryop/compiled/equality_ops.cu
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binary_ops.cuh"
+
+namespace cudf::binops::compiled {
+void dispatch_equality_op(mutable_column_device_view& outd,
+                          column_device_view const& lhsd,
+                          column_device_view const& rhsd,
+                          bool is_lhs_scalar,
+                          bool is_rhs_scalar,
+                          binary_operator op,
+                          rmm::cuda_stream_view stream)
+{
+  auto common_dtype = get_common_type(outd.type(), lhsd.type(), rhsd.type());
+
+  // Execute it on every element
+  for_each(
+    stream,
+    outd.size(),
+    [op, outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar, common_dtype] __device__(size_type i) {
+      // clang-format off
+      // Similar enabled template types should go together (better performance)
+      switch (op) {
+      case binary_operator::EQUAL:         device_type_dispatcher<ops::Equal>{outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar, common_dtype}(i); break;
+      case binary_operator::NOT_EQUAL:     device_type_dispatcher<ops::NotEqual>{outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar, common_dtype}(i); break;
+      case binary_operator::NULL_EQUALS:   device_type_dispatcher<ops::NullEquals>{outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar, common_dtype}(i); break;
+      default:;
+      }
+      // clang-format on
+    });
+}
+}  // namespace cudf::binops::compiled
diff --git a/cpp/src/binaryop/compiled/operation.cuh b/cpp/src/binaryop/compiled/operation.cuh
new file mode 100644
index 00000000000..86645e2cb8a
--- /dev/null
+++ b/cpp/src/binaryop/compiled/operation.cuh
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/utilities/traits.hpp>
+
+#include <cmath>
+
+namespace cudf {
+namespace binops {
+namespace compiled {
+
+// All binary operations
+namespace ops {
+
+struct Add {
+  template <typename T1, typename T2>
+  CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> decltype(lhs + rhs)
+  {
+    return lhs + rhs;
+  }
+};
+
+struct Sub {
+  template <typename T1, typename T2>
+  CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> decltype(lhs - rhs)
+  {
+    return lhs - rhs;
+  }
+};
+
+struct Mul {
+  template <typename TypeLhs, typename TypeRhs>
+  static constexpr inline bool is_supported()
+  {
+    return has_common_type_v<TypeLhs, TypeRhs> or
+           // FIXME: without the following line, compilation error
+           // _deps/libcudacxx-src/include/cuda/std/detail/libcxx/include/chrono(917): error:
+           // identifier "cuda::std::__3::ratio<(long)86400000000l, (long)1l> ::num" is undefined in
+           // device code
+           (is_duration<TypeLhs>() and std::is_integral<TypeRhs>()) or
+           (std::is_integral<TypeLhs>() and is_duration<TypeRhs>()) or
+           (is_fixed_point<TypeLhs>() and is_numeric<TypeRhs>()) or
+           (is_numeric<TypeLhs>() and is_fixed_point<TypeRhs>());
+  }
+  template <typename T1, typename T2, std::enable_if_t<is_supported<T1, T2>()>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> decltype(lhs * rhs)
+  {
+    return lhs * rhs;
+  }
+};
+
+struct Div {
+  template <typename TypeLhs, typename TypeRhs>
+  static constexpr inline bool is_supported()
+  {
+    return has_common_type_v<TypeLhs, TypeRhs> or
+           // FIXME: without this, compilation error on chrono:917
+           (is_duration<TypeLhs>() and (std::is_integral<TypeRhs>() or is_duration<TypeRhs>())) or
+           (is_fixed_point<TypeLhs>() and is_numeric<TypeRhs>()) or
+           (is_numeric<TypeLhs>() and is_fixed_point<TypeRhs>());
+  }
+  template <typename T1, typename T2, std::enable_if_t<is_supported<T1, T2>()>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> decltype(lhs / rhs)
+  {
+    return lhs / rhs;
+  }
+};
+
+struct TrueDiv {
+  template <typename T1, typename T2>
+  CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs)
+    -> decltype((static_cast<double>(lhs) / static_cast<double>(rhs)))
+  {
+    return (static_cast<double>(lhs) / static_cast<double>(rhs));
+  }
+};
+
+struct FloorDiv {
+  template <typename T1, typename T2>
+  CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs)
+    -> decltype(floor(static_cast<double>(lhs) / static_cast<double>(rhs)))
+  {
+    return floor(static_cast<double>(lhs) / static_cast<double>(rhs));
+  }
+};
+
+struct Mod {
+  template <typename TypeLhs, typename TypeRhs>
+  static constexpr inline bool is_supported()
+  {
+    return has_common_type_v<TypeLhs, TypeRhs> or
+           // FIXME: without this, compilation error
+           //_deps/libcudacxx-src/include/cuda/std/detail/libcxx/include/chrono(1337):
+           // error : expression must have integral or unscoped enum type
+           (is_duration<TypeLhs>() and (std::is_integral<TypeRhs>() or is_duration<TypeRhs>()));
+  }
+  template <typename T1, typename T2, std::enable_if_t<is_supported<T1, T2>()>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> decltype(lhs % rhs)
+  {
+    return lhs % rhs;
+  }
+  template <typename T1,
+            typename T2,
+            std::enable_if_t<(std::is_same_v<float, std::common_type_t<T1, T2>>)>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> float
+  {
+    return fmodf(static_cast<float>(lhs), static_cast<float>(rhs));
+  }
+  template <typename T1,
+            typename T2,
+            std::enable_if_t<(std::is_same_v<double, std::common_type_t<T1, T2>>)>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> double
+  {
+    return fmod(static_cast<double>(lhs), static_cast<double>(rhs));
+  }
+};
+
+struct PMod {
+  // Ideally, these two specializations - one for integral types and one for non integral
+  // types shouldn't be required, as std::fmod should promote integral types automatically
+  // to double and call the std::fmod overload for doubles. Sadly, doing this in jitified
+  // code does not work - it is having trouble deciding between float/double overloads
+  template <typename TypeLhs,
+            typename TypeRhs,
+            std::enable_if_t<(std::is_integral_v<std::common_type_t<TypeLhs, TypeRhs>>)>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y)
+  {
+    using common_t = std::common_type_t<TypeLhs, TypeRhs>;
+    common_t xconv = static_cast<common_t>(x);
+    common_t yconv = static_cast<common_t>(y);
+    auto rem       = xconv % yconv;
+    if constexpr (std::is_signed_v<decltype(rem)>)
+      if (rem < 0) rem = (rem + yconv) % yconv;
+    return rem;
+  }
+
+  template <
+    typename TypeLhs,
+    typename TypeRhs,
+    std::enable_if_t<(std::is_floating_point_v<std::common_type_t<TypeLhs, TypeRhs>>)>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y)
+  {
+    using common_t = std::common_type_t<TypeLhs, TypeRhs>;
+    common_t xconv = static_cast<common_t>(x);
+    common_t yconv = static_cast<common_t>(y);
+    auto rem       = std::fmod(xconv, yconv);
+    if (rem < 0) rem = std::fmod(rem + yconv, yconv);
+    return rem;
+  }
+};
+
+struct PyMod {
+  template <typename TypeLhs,
+            typename TypeRhs,
+            std::enable_if_t<(std::is_integral_v<std::common_type_t<TypeLhs, TypeRhs>>)>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(((x % y) + y) % y)
+  {
+    return ((x % y) + y) % y;
+  }
+
+  template <
+    typename TypeLhs,
+    typename TypeRhs,
+    std::enable_if_t<(std::is_floating_point_v<std::common_type_t<TypeLhs, TypeRhs>>)>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> double
+  {
+    double x1 = static_cast<double>(x);
+    double y1 = static_cast<double>(y);
+    return fmod(fmod(x1, y1) + y1, y1);
+  }
+
+  template <typename TypeLhs,
+            typename TypeRhs,
+            std::enable_if_t<(is_duration<TypeLhs>())>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(((x % y) + y) % y)
+  {
+    return ((x % y) + y) % y;
+  }
+};
+
+struct Pow {
+  template <typename TypeLhs,
+            typename TypeRhs,
+            std::enable_if_t<(std::is_convertible_v<TypeLhs, double> and
+                              std::is_convertible_v<TypeRhs, double>)>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> double
+  {
+    return pow(static_cast<double>(x), static_cast<double>(y));
+  }
+};
+
+struct LogBase {
+  template <typename TypeLhs,
+            typename TypeRhs,
+            std::enable_if_t<(std::is_convertible_v<TypeLhs, double> and
+                              std::is_convertible_v<TypeRhs, double>)>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> double
+  {
+    return (std::log(static_cast<double>(x)) / std::log(static_cast<double>(y)));
+  }
+};
+
+struct ATan2 {
+  template <typename TypeLhs,
+            typename TypeRhs,
+            std::enable_if_t<(std::is_convertible_v<TypeLhs, double> and
+                              std::is_convertible_v<TypeRhs, double>)>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> double
+  {
+    return std::atan2(static_cast<double>(x), static_cast<double>(y));
+  }
+};
+
+struct ShiftLeft {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x << y)
+  {
+    return (x << y);
+  }
+};
+
+struct ShiftRight {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x >> y)
+  {
+    return (x >> y);
+  }
+};
+
+struct ShiftRightUnsigned {
+  template <
+    typename TypeLhs,
+    typename TypeRhs,
+    std::enable_if_t<(std::is_integral_v<TypeLhs> and not is_boolean<TypeLhs>())>* = nullptr>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y)
+    -> decltype(static_cast<std::make_unsigned_t<TypeLhs>>(x) >> y)
+  {
+    return (static_cast<std::make_unsigned_t<TypeLhs>>(x) >> y);
+  }
+};
+
+struct BitwiseAnd {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x & y)
+  {
+    return (x & y);
+  }
+};
+
+struct BitwiseOr {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x | y)
+  {
+    return (x | y);
+  }
+};
+
+struct BitwiseXor {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x ^ y)
+  {
+    return (x ^ y);
+  }
+};
+
+struct LogicalAnd {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x && y)
+  {
+    return (x && y);
+  }
+};
+
+struct LogicalOr {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x || y)
+  {
+    return (x || y);
+  }
+};
+
+struct Equal {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x == y)
+  {
+    return (x == y);
+  }
+};
+
+struct NotEqual {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x != y)
+  {
+    return (x != y);
+  }
+};
+
+struct Less {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x < y)
+  {
+    return (x < y);
+  }
+};
+
+struct Greater {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x > y)
+  {
+    return (x > y);
+  }
+};
+
+struct LessEqual {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x <= y)
+  {
+    return (x <= y);
+  }
+};
+
+struct GreaterEqual {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x >= y)
+  {
+    return (x >= y);
+  }
+};
+
+struct NullEquals {
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(
+    TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) -> decltype(x == y)
+  {
+    output_valid = true;
+    if (!lhs_valid && !rhs_valid) return true;
+    if (lhs_valid && rhs_valid) return x == y;
+    return false;
+  }
+  // To allow std::is_invocable_v = true
+  template <typename TypeLhs, typename TypeRhs>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x == y);
+};
+
+struct NullMax {
+  template <typename TypeLhs,
+            typename TypeRhs,
+            typename common_t = std::common_type_t<TypeLhs, TypeRhs>>
+  CUDA_DEVICE_CALLABLE auto operator()(
+    TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid)
+    -> decltype(static_cast<common_t>(static_cast<common_t>(x) > static_cast<common_t>(y) ? x : y))
+  {
+    output_valid      = true;
+    auto const x_conv = static_cast<common_t>(x);
+    auto const y_conv = static_cast<common_t>(y);
+    if (!lhs_valid && !rhs_valid) {
+      output_valid = false;
+      return common_t{};
+    } else if (lhs_valid && rhs_valid) {
+      return (x_conv > y_conv) ? x_conv : y_conv;
+    } else if (lhs_valid)
+      return x_conv;
+    else
+      return y_conv;
+  }
+  // To allow std::is_invocable_v = true
+  template <typename TypeLhs,
+            typename TypeRhs,
+            typename common_t = std::common_type_t<TypeLhs, TypeRhs>>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y)
+    -> decltype(static_cast<common_t>(static_cast<common_t>(x) > static_cast<common_t>(y) ? x : y));
+};
+
+struct NullMin {
+  template <typename TypeLhs,
+            typename TypeRhs,
+            typename common_t = std::common_type_t<TypeLhs, TypeRhs>>
+  CUDA_DEVICE_CALLABLE auto operator()(
+    TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid)
+    -> decltype(static_cast<common_t>(static_cast<common_t>(x) < static_cast<common_t>(y) ? x : y))
+  {
+    output_valid      = true;
+    auto const x_conv = static_cast<common_t>(x);
+    auto const y_conv = static_cast<common_t>(y);
+    if (!lhs_valid && !rhs_valid) {
+      output_valid = false;
+      return common_t{};
+    } else if (lhs_valid && rhs_valid) {
+      return (x_conv < y_conv) ? x_conv : y_conv;
+    } else if (lhs_valid)
+      return x_conv;
+    else
+      return y_conv;
+  }
+  // To allow std::is_invocable_v = true
+  template <typename TypeLhs,
+            typename TypeRhs,
+            typename common_t = std::common_type_t<TypeLhs, TypeRhs>>
+  CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y)
+    -> decltype(static_cast<common_t>(static_cast<common_t>(x) < static_cast<common_t>(y) ? x : y));
+};
+
+}  // namespace ops
+}  // namespace compiled
+}  // namespace binops
+}  // namespace cudf
diff --git a/cpp/src/binaryop/compiled/util.cpp b/cpp/src/binaryop/compiled/util.cpp
new file mode 100644
index 00000000000..89320256aec
--- /dev/null
+++ b/cpp/src/binaryop/compiled/util.cpp
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "operation.cuh"
+
+#include <cudf/binaryop.hpp>
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/utilities/traits.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+namespace cudf::binops::compiled {
+
+namespace {
+/**
+ * @brief Functor that returns optional common type of 2 or 3 given types.
+ *
+ */
+struct common_type_functor {
+  template <typename TypeLhs, typename TypeRhs>
+  struct nested_common_type_functor {
+    template <typename TypeOut>
+    std::optional<data_type> operator()()
+    {
+      // If common_type exists
+      if constexpr (cudf::has_common_type_v<TypeOut, TypeLhs, TypeRhs>) {
+        using TypeCommon = typename std::common_type<TypeOut, TypeLhs, TypeRhs>::type;
+        return data_type{type_to_id<TypeCommon>()};
+      } else if constexpr (cudf::has_common_type_v<TypeLhs, TypeRhs>) {
+        using TypeCommon = typename std::common_type<TypeLhs, TypeRhs>::type;
+        // Eg. d=t-t
+        return data_type{type_to_id<TypeCommon>()};
+      }
+      return {};
+    }
+  };
+  template <typename TypeLhs, typename TypeRhs>
+  std::optional<data_type> operator()(data_type out)
+  {
+    return type_dispatcher(out, nested_common_type_functor<TypeLhs, TypeRhs>{});
+  }
+};
+
+/**
+ * @brief Functor that return true if BinaryOperator supports given input and output types.
+ *
+ * @tparam BinaryOperator binary operator functor
+ */
+template <typename BinaryOperator>
+struct is_binary_operation_supported {
+  // For types where Out type is fixed. (eg. comparison types)
+  template <typename TypeLhs, typename TypeRhs>
+  inline constexpr bool operator()(void)
+  {
+    if constexpr (column_device_view::has_element_accessor<TypeLhs>() and
+                  column_device_view::has_element_accessor<TypeRhs>()) {
+      if constexpr (has_common_type_v<TypeLhs, TypeRhs>) {
+        using common_t = std::common_type_t<TypeLhs, TypeRhs>;
+        return std::is_invocable_v<BinaryOperator, common_t, common_t>;
+      } else
+        return std::is_invocable_v<BinaryOperator, TypeLhs, TypeRhs>;
+    } else {
+      return false;
+    }
+  }
+
+  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
+  inline constexpr bool operator()(void)
+  {
+    if constexpr (column_device_view::has_element_accessor<TypeLhs>() and
+                  column_device_view::has_element_accessor<TypeRhs>() and
+                  (mutable_column_device_view::has_element_accessor<TypeOut>() or
+                   is_fixed_point<TypeOut>())) {
+      if constexpr (has_common_type_v<TypeLhs, TypeRhs>) {
+        using common_t = std::common_type_t<TypeLhs, TypeRhs>;
+        if constexpr (std::is_invocable_v<BinaryOperator, common_t, common_t>) {
+          using ReturnType = std::invoke_result_t<BinaryOperator, common_t, common_t>;
+          return std::is_constructible_v<TypeOut, ReturnType>;
+        }
+      } else {
+        if constexpr (std::is_invocable_v<BinaryOperator, TypeLhs, TypeRhs>) {
+          using ReturnType = std::invoke_result_t<BinaryOperator, TypeLhs, TypeRhs>;
+          return std::is_constructible_v<TypeOut, ReturnType>;
+        }
+      }
+    }
+    return false;
+  }
+};
+
+struct is_supported_operation_functor {
+  template <typename TypeLhs, typename TypeRhs>
+  struct nested_support_functor {
+    template <typename BinaryOperator, typename TypeOut>
+    inline constexpr bool call()
+    {
+      return is_binary_operation_supported<BinaryOperator>{}
+        .template operator()<TypeOut, TypeLhs, TypeRhs>();
+    }
+    template <typename TypeOut>
+    inline constexpr bool operator()(binary_operator op)
+    {
+      switch (op) {
+        // clang-format off
+        case binary_operator::ADD:                  return call<ops::Add, TypeOut>();
+        case binary_operator::SUB:                  return call<ops::Sub, TypeOut>();
+        case binary_operator::MUL:                  return call<ops::Mul, TypeOut>();
+        case binary_operator::DIV:                  return call<ops::Div, TypeOut>();
+        case binary_operator::TRUE_DIV:             return call<ops::TrueDiv, TypeOut>();
+        case binary_operator::FLOOR_DIV:            return call<ops::FloorDiv, TypeOut>();
+        case binary_operator::MOD:                  return call<ops::Mod, TypeOut>();
+        case binary_operator::PYMOD:                return call<ops::PyMod, TypeOut>();
+        case binary_operator::POW:                  return call<ops::Pow, TypeOut>();
+        case binary_operator::BITWISE_AND:          return call<ops::BitwiseAnd, TypeOut>();
+        case binary_operator::BITWISE_OR:           return call<ops::BitwiseOr, TypeOut>();
+        case binary_operator::BITWISE_XOR:          return call<ops::BitwiseXor, TypeOut>();
+        case binary_operator::SHIFT_LEFT:           return call<ops::ShiftLeft, TypeOut>();
+        case binary_operator::SHIFT_RIGHT:          return call<ops::ShiftRight, TypeOut>();
+        case binary_operator::SHIFT_RIGHT_UNSIGNED: return call<ops::ShiftRightUnsigned, TypeOut>();
+        case binary_operator::LOG_BASE:             return call<ops::LogBase, TypeOut>();
+        case binary_operator::ATAN2:                return call<ops::ATan2, TypeOut>();
+        case binary_operator::PMOD:                 return call<ops::PMod, TypeOut>();
+        case binary_operator::NULL_MAX:             return call<ops::NullMax, TypeOut>();
+        case binary_operator::NULL_MIN:             return call<ops::NullMin, TypeOut>();
+        /*
+        case binary_operator::GENERIC_BINARY:       // defined in jit only.
+        */
+        default:                                    return false;
+          // clang-format on
+      }
+    }
+  };
+
+  template <typename BinaryOperator, typename TypeLhs, typename TypeRhs>
+  inline constexpr bool bool_op(data_type out)
+  {
+    return out.id() == type_id::BOOL8 and
+           is_binary_operation_supported<BinaryOperator>{}.template operator()<TypeLhs, TypeRhs>();
+  }
+  template <typename TypeLhs, typename TypeRhs>
+  inline constexpr bool operator()(data_type out, binary_operator op)
+  {
+    switch (op) {
+      // output type should be bool type.
+      case binary_operator::LOGICAL_AND: return bool_op<ops::LogicalAnd, TypeLhs, TypeRhs>(out);
+      case binary_operator::LOGICAL_OR: return bool_op<ops::LogicalOr, TypeLhs, TypeRhs>(out);
+      case binary_operator::EQUAL: return bool_op<ops::Equal, TypeLhs, TypeRhs>(out);
+      case binary_operator::NOT_EQUAL: return bool_op<ops::NotEqual, TypeLhs, TypeRhs>(out);
+      case binary_operator::LESS: return bool_op<ops::Less, TypeLhs, TypeRhs>(out);
+      case binary_operator::GREATER: return bool_op<ops::Greater, TypeLhs, TypeRhs>(out);
+      case binary_operator::LESS_EQUAL: return bool_op<ops::LessEqual, TypeLhs, TypeRhs>(out);
+      case binary_operator::GREATER_EQUAL: return bool_op<ops::GreaterEqual, TypeLhs, TypeRhs>(out);
+      case binary_operator::NULL_EQUALS: return bool_op<ops::NullEquals, TypeLhs, TypeRhs>(out);
+      default: return type_dispatcher(out, nested_support_functor<TypeLhs, TypeRhs>{}, op);
+    }
+    return false;
+  }
+};
+
+}  // namespace
+
+std::optional<data_type> get_common_type(data_type out, data_type lhs, data_type rhs)
+{
+  return double_type_dispatcher(lhs, rhs, common_type_functor{}, out);
+}
+
+bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op)
+{
+  return double_type_dispatcher(lhs, rhs, is_supported_operation_functor{}, out, op);
+}
+}  // namespace cudf::binops::compiled
diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp
index 546eb050a60..045bfbe0327 100644
--- a/cpp/src/scalar/scalar.cpp
+++ b/cpp/src/scalar/scalar.cpp
@@ -181,6 +181,12 @@ T fixed_point_scalar<T>::fixed_point_value(rmm::cuda_stream_view stream) const
     numeric::scaled_integer<rep_type>{_data.value(stream), numeric::scale_type{type().scale()}}};
 }
 
+template <typename T>
+fixed_point_scalar<T>::operator value_type() const
+{
+  return this->fixed_point_value(rmm::cuda_stream_default);
+}
+
 template <typename T>
 typename fixed_point_scalar<T>::rep_type* fixed_point_scalar<T>::data()
 {
diff --git a/cpp/src/table/table_device_view.cu b/cpp/src/table/table_device_view.cu
index 62daeed6d79..859a6be3bb0 100644
--- a/cpp/src/table/table_device_view.cu
+++ b/cpp/src/table/table_device_view.cu
@@ -55,7 +55,7 @@ template class table_device_view_base<column_device_view, table_view>;
 template class table_device_view_base<mutable_column_device_view, mutable_table_view>;
 
 namespace {
-struct is_relationally_comparable_impl {
+struct is_relationally_comparable_functor {
   template <typename T>
   constexpr bool operator()()
   {
@@ -74,7 +74,7 @@ bool is_relationally_comparable(TableView const& lhs, TableView const& rhs)
                           // TODO: possible to implement without double type dispatcher.
                           return lhs.column(i).type() == rhs.column(i).type() and
                                  type_dispatcher(lhs.column(i).type(),
-                                                 is_relationally_comparable_impl{});
+                                                 is_relationally_comparable_functor{});
                         });
 }
 
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 48c96316795..34fceb9015e 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -163,6 +163,8 @@ ConfigureTest(BINARY_TEST
     binaryop/binop-verify-input-test.cpp
     binaryop/binop-null-test.cpp
     binaryop/binop-integration-test.cpp
+    binaryop/binop-compiled-test.cpp
+    binaryop/binop-compiled-fixed_point-test.cpp
     binaryop/binop-generic-ptx-test.cpp
     )
 
diff --git a/cpp/tests/binaryop/assert-binops.h b/cpp/tests/binaryop/assert-binops.h
index 9e762a1c987..65859251e42 100644
--- a/cpp/tests/binaryop/assert-binops.h
+++ b/cpp/tests/binaryop/assert-binops.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Copyright 2018-2019 BlazingDB, Inc.
  *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
@@ -36,28 +36,21 @@ namespace binop {
 // result returned by the binop operation into string, which is then used for display purposes
 // when the values do not match.
 struct stringify_out_values {
-  template <typename TypeOut, std::enable_if_t<!is_chrono<TypeOut>()>* = nullptr>
-  std::string operator()(TypeOut lhs, TypeOut rhs) const
+  template <typename TypeOut>
+  std::string operator()(size_type i, TypeOut lhs, TypeOut rhs) const
   {
     std::stringstream out_str;
-    out_str << "lhs: " << lhs << "\nrhs: " << rhs;
-    return out_str.str();
-  }
-
-  template <typename TypeOut, std::enable_if_t<is_timestamp<TypeOut>()>* = nullptr>
-  std::string operator()(TypeOut lhs, TypeOut rhs) const
-  {
-    std::stringstream out_str;
-    out_str << "lhs: " << lhs.time_since_epoch().count()
-            << "\nrhs: " << rhs.time_since_epoch().count();
-    return out_str.str();
-  }
-
-  template <typename TypeOut, std::enable_if_t<is_duration<TypeOut>()>* = nullptr>
-  std::string operator()(TypeOut lhs, TypeOut rhs) const
-  {
-    std::stringstream out_str;
-    out_str << "lhs: " << lhs.count() << "\nrhs: " << rhs.count();
+    out_str << "[" << i << "]:\n";
+    if constexpr (is_fixed_point<TypeOut>()) {
+      out_str << "lhs: " << std::string(lhs) << "\nrhs: " << std::string(rhs);
+    } else if constexpr (is_timestamp<TypeOut>()) {
+      out_str << "lhs: " << lhs.time_since_epoch().count()
+              << "\nrhs: " << rhs.time_since_epoch().count();
+    } else if constexpr (is_duration<TypeOut>()) {
+      out_str << "lhs: " << lhs.count() << "\nrhs: " << rhs.count();
+    } else {
+      out_str << "lhs: " << lhs << "\nrhs: " << rhs;
+    }
     return out_str.str();
   }
 };
@@ -101,7 +94,7 @@ void ASSERT_BINOP(column_view const& out,
   for (size_t i = 0; i < out_data.size(); ++i) {
     auto lhs = out_data[i];
     auto rhs = (TypeOut)(op(lhs_h, rhs_data[i]));
-    ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(lhs, rhs);
+    ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(i, lhs, rhs);
   }
 
   if (rhs.nullable()) {
@@ -148,7 +141,7 @@ void ASSERT_BINOP(column_view const& out,
   for (size_t i = 0; i < out_data.size(); ++i) {
     auto lhs = out_data[i];
     auto rhs = (TypeOut)(op(lhs_data[i], rhs_h));
-    ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(lhs, rhs);
+    ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(i, lhs, rhs);
   }
 
   if (lhs.nullable()) {
@@ -196,7 +189,7 @@ void ASSERT_BINOP(column_view const& out,
   for (size_t i = 0; i < out_data.size(); ++i) {
     auto lhs = out_data[i];
     auto rhs = (TypeOut)(op(lhs_data[i], rhs_data[i]));
-    ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(lhs, rhs);
+    ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(i, lhs, rhs);
   }
 
   if (lhs.nullable() and rhs.nullable()) {
diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
new file mode 100644
index 00000000000..feb75cc3f09
--- /dev/null
+++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
@@ -0,0 +1,721 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/binaryop.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/types.hpp>
+#include <cudf/unary.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <tests/binaryop/assert-binops.h>
+#include <tests/binaryop/binop-fixture.hpp>
+#include "cudf/utilities/error.hpp"
+
+namespace cudf::test::binop {
+
+template <typename T>
+struct FixedPointCompiledTestBothReps : public cudf::test::BaseFixture {
+};
+
+template <typename T>
+using wrapper = cudf::test::fixed_width_column_wrapper<T>;
+TYPED_TEST_CASE(FixedPointCompiledTestBothReps, cudf::test::FixedPointTypes);
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+
+  auto const sz = std::size_t{1000};
+
+  auto begin      = cudf::detail::make_counting_transform_iterator(1, [](auto i) {
+    return decimalXX{i, scale_type{0}};
+  });
+  auto const vec1 = std::vector<decimalXX>(begin, begin + sz);
+  auto const vec2 = std::vector<decimalXX>(sz, decimalXX{2, scale_type{0}});
+  auto expected   = std::vector<decimalXX>(sz);
+
+  std::transform(std::cbegin(vec1),
+                 std::cend(vec1),
+                 std::cbegin(vec2),
+                 std::begin(expected),
+                 std::plus<decimalXX>());
+
+  auto const lhs          = wrapper<decimalXX>(vec1.begin(), vec1.end());
+  auto const rhs          = wrapper<decimalXX>(vec2.begin(), vec2.end());
+  auto const expected_col = wrapper<decimalXX>(expected.begin(), expected.end());
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD,
+                                                   static_cast<cudf::column_view>(lhs).type(),
+                                                   static_cast<cudf::column_view>(rhs).type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpMultiply)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+
+  auto const sz = std::size_t{1000};
+
+  auto begin      = cudf::detail::make_counting_transform_iterator(1, [](auto i) {
+    return decimalXX{i, scale_type{0}};
+  });
+  auto const vec1 = std::vector<decimalXX>(begin, begin + sz);
+  auto const vec2 = std::vector<decimalXX>(sz, decimalXX{2, scale_type{0}});
+  auto expected   = std::vector<decimalXX>(sz);
+
+  std::transform(std::cbegin(vec1),
+                 std::cend(vec1),
+                 std::cbegin(vec2),
+                 std::begin(expected),
+                 std::multiplies<decimalXX>());
+
+  auto const lhs          = wrapper<decimalXX>(vec1.begin(), vec1.end());
+  auto const rhs          = wrapper<decimalXX>(vec2.begin(), vec2.end());
+  auto const expected_col = wrapper<decimalXX>(expected.begin(), expected.end());
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL,
+                                                   static_cast<cudf::column_view>(lhs).type(),
+                                                   static_cast<cudf::column_view>(rhs).type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view());
+}
+
+template <typename T>
+using fp_wrapper = cudf::test::fixed_point_column_wrapper<T>;
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpMultiply2)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{11, 22, 33, 44, 55}, scale_type{-1}};
+  auto const rhs      = fp_wrapper<RepType>{{10, 10, 10, 10, 10}, scale_type{0}};
+  auto const expected = fp_wrapper<RepType>{{110, 220, 330, 440, 550}, scale_type{-1}};
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL,
+                                                   static_cast<cudf::column_view>(lhs).type(),
+                                                   static_cast<cudf::column_view>(rhs).type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpDiv)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{10, 30, 50, 70}, scale_type{-1}};
+  auto const rhs      = fp_wrapper<RepType>{{4, 4, 4, 4}, scale_type{0}};
+  auto const expected = fp_wrapper<RepType>{{2, 7, 12, 17}, scale_type{-1}};
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV,
+                                                   static_cast<cudf::column_view>(lhs).type(),
+                                                   static_cast<cudf::column_view>(rhs).type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpDiv2)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{10, 30, 50, 70}, scale_type{-1}};
+  auto const rhs      = fp_wrapper<RepType>{{4, 4, 4, 4}, scale_type{-2}};
+  auto const expected = fp_wrapper<RepType>{{2, 7, 12, 17}, scale_type{1}};
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV,
+                                                   static_cast<cudf::column_view>(lhs).type(),
+                                                   static_cast<cudf::column_view>(rhs).type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpDiv3)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{10, 30, 50, 70}, scale_type{-1}};
+  auto const rhs      = make_fixed_point_scalar<decimalXX>(12, scale_type{-1});
+  auto const expected = fp_wrapper<RepType>{{0, 2, 4, 5}, scale_type{0}};
+
+  auto const type = cudf::binary_operation_fixed_point_output_type(
+    cudf::binary_operator::DIV, static_cast<cudf::column_view>(lhs).type(), rhs->type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpDiv4)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto begin = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 11; });
+  auto result_begin =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i * 11) / 12; });
+  auto const lhs      = fp_wrapper<RepType>(begin, begin + 1000, scale_type{-1});
+  auto const rhs      = make_fixed_point_scalar<decimalXX>(12, scale_type{-1});
+  auto const expected = fp_wrapper<RepType>(result_begin, result_begin + 1000, scale_type{0});
+
+  auto const type = cudf::binary_operation_fixed_point_output_type(
+    cudf::binary_operator::DIV, static_cast<cudf::column_view>(lhs).type(), rhs->type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd2)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{11, 22, 33, 44, 55}, scale_type{-1}};
+  auto const rhs      = fp_wrapper<RepType>{{100, 200, 300, 400, 500}, scale_type{-2}};
+  auto const expected = fp_wrapper<RepType>{{210, 420, 630, 840, 1050}, scale_type{-2}};
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD,
+                                                   static_cast<cudf::column_view>(lhs).type(),
+                                                   static_cast<cudf::column_view>(rhs).type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd3)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{1100, 2200, 3300, 4400, 5500}, scale_type{-3}};
+  auto const rhs      = fp_wrapper<RepType>{{100, 200, 300, 400, 500}, scale_type{-2}};
+  auto const expected = fp_wrapper<RepType>{{2100, 4200, 6300, 8400, 10500}, scale_type{-3}};
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD,
+                                                   static_cast<cudf::column_view>(lhs).type(),
+                                                   static_cast<cudf::column_view>(rhs).type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd4)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{11, 22, 33, 44, 55}, scale_type{-1}};
+  auto const rhs      = make_fixed_point_scalar<decimalXX>(100, scale_type{-2});
+  auto const expected = fp_wrapper<RepType>{{210, 320, 430, 540, 650}, scale_type{-2}};
+
+  auto const type = cudf::binary_operation_fixed_point_output_type(
+    cudf::binary_operator::ADD, static_cast<cudf::column_view>(lhs).type(), rhs->type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::ADD, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd5)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = make_fixed_point_scalar<decimalXX>(100, scale_type{-2});
+  auto const rhs      = fp_wrapper<RepType>{{11, 22, 33, 44, 55}, scale_type{-1}};
+  auto const expected = fp_wrapper<RepType>{{210, 320, 430, 540, 650}, scale_type{-2}};
+
+  auto const type = cudf::binary_operation_fixed_point_output_type(
+    cudf::binary_operator::ADD, lhs->type(), static_cast<cudf::column_view>(rhs).type());
+  auto const result =
+    cudf::experimental::binary_operation(*lhs, rhs, cudf::binary_operator::ADD, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd6)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const col = fp_wrapper<RepType>{{30, 4, 5, 6, 7, 8}, scale_type{0}};
+
+  auto const expected1 = fp_wrapper<RepType>{{60, 8, 10, 12, 14, 16}, scale_type{0}};
+  auto const expected2 = fp_wrapper<RepType>{{6, 0, 1, 1, 1, 1}, scale_type{1}};
+  auto const type1     = cudf::data_type{cudf::type_to_id<decimalXX>(), 0};
+  auto const type2     = cudf::data_type{cudf::type_to_id<decimalXX>(), 1};
+  auto const result1 =
+    cudf::experimental::binary_operation(col, col, cudf::binary_operator::ADD, type1);
+  auto const result2 =
+    cudf::experimental::binary_operation(col, col, cudf::binary_operator::ADD, type2);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointCast)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const col      = fp_wrapper<RepType>{{6, 8, 10, 12, 14, 16}, scale_type{0}};
+  auto const expected = fp_wrapper<RepType>{{0, 0, 1, 1, 1, 1}, scale_type{1}};
+  auto const type     = cudf::data_type{cudf::type_to_id<decimalXX>(), 1};
+  auto const result   = cudf::cast(col, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpMultiplyScalar)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{11, 22, 33, 44, 55}, scale_type{-1}};
+  auto const rhs      = make_fixed_point_scalar<decimalXX>(100, scale_type{-1});
+  auto const expected = fp_wrapper<RepType>{{1100, 2200, 3300, 4400, 5500}, scale_type{-2}};
+
+  auto const type = cudf::binary_operation_fixed_point_output_type(
+    cudf::binary_operator::MUL, static_cast<cudf::column_view>(lhs).type(), rhs->type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::MUL, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpSimplePlus)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{150, 200}, scale_type{-2}};
+  auto const rhs      = fp_wrapper<RepType>{{2250, 1005}, scale_type{-3}};
+  auto const expected = fp_wrapper<RepType>{{3750, 3005}, scale_type{-3}};
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD,
+                                                   static_cast<cudf::column_view>(lhs).type(),
+                                                   static_cast<cudf::column_view>(rhs).type());
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpEqualSimple)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const trues    = std::vector<bool>(4, true);
+  auto const col1     = fp_wrapper<RepType>{{1, 2, 3, 4}, scale_type{0}};
+  auto const col2     = fp_wrapper<RepType>{{100, 200, 300, 400}, scale_type{-2}};
+  auto const expected = wrapper<bool>(trues.begin(), trues.end());
+
+  auto const result = cudf::experimental::binary_operation(
+    col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpEqualSimpleScale0)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const trues    = std::vector<bool>(4, true);
+  auto const col      = fp_wrapper<RepType>{{1, 2, 3, 4}, scale_type{0}};
+  auto const expected = wrapper<bool>(trues.begin(), trues.end());
+
+  auto const result = cudf::experimental::binary_operation(
+    col, col, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpEqualSimpleScale0Null)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const col1     = fp_wrapper<RepType>{{1, 2, 3, 4}, {1, 1, 1, 1}, scale_type{0}};
+  auto const col2     = fp_wrapper<RepType>{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}};
+  auto const expected = wrapper<bool>{{0, 1, 0, 1}, {0, 0, 0, 0}};
+
+  auto const result = cudf::experimental::binary_operation(
+    col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpEqualSimpleScale2Null)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const col1     = fp_wrapper<RepType>{{1, 2, 3, 4}, {1, 1, 1, 1}, scale_type{-2}};
+  auto const col2     = fp_wrapper<RepType>{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}};
+  auto const expected = wrapper<bool>{{0, 1, 0, 1}, {0, 0, 0, 0}};
+
+  auto const result = cudf::experimental::binary_operation(
+    col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpEqualLessGreater)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const sz = std::size_t{1000};
+
+  // TESTING binary op ADD
+
+  auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto e) { return e * 1000; });
+  auto const vec1 = std::vector<RepType>(begin, begin + sz);
+  auto const vec2 = std::vector<RepType>(sz, 0);
+
+  auto const iota_3  = fp_wrapper<RepType>(vec1.begin(), vec1.end(), scale_type{-3});
+  auto const zeros_3 = fp_wrapper<RepType>(vec2.begin(), vec2.end(), scale_type{-1});
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD,
+                                                   static_cast<cudf::column_view>(iota_3).type(),
+                                                   static_cast<cudf::column_view>(zeros_3).type());
+  auto const iota_3_after_add =
+    cudf::experimental::binary_operation(zeros_3, iota_3, binary_operator::ADD, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(iota_3, iota_3_after_add->view());
+
+  // TESTING binary op EQUAL, LESS, GREATER
+
+  auto const trues    = std::vector<bool>(sz, true);
+  auto const true_col = wrapper<bool>(trues.begin(), trues.end());
+
+  auto const btype        = cudf::data_type{type_id::BOOL8};
+  auto const equal_result = cudf::experimental::binary_operation(
+    iota_3, iota_3_after_add->view(), binary_operator::EQUAL, btype);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, equal_result->view());
+
+  auto const less_result = cudf::experimental::binary_operation(
+    zeros_3, iota_3_after_add->view(), binary_operator::LESS, btype);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, less_result->view());
+
+  auto const greater_result = cudf::experimental::binary_operation(
+    iota_3_after_add->view(), zeros_3, binary_operator::GREATER, btype);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, greater_result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpNullMaxSimple)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const trues    = std::vector<bool>(4, true);
+  auto const col1     = fp_wrapper<RepType>{{40, 30, 20, 10, 0}, {1, 0, 1, 1, 0}, scale_type{-2}};
+  auto const col2     = fp_wrapper<RepType>{{10, 20, 30, 40, 0}, {1, 1, 1, 0, 0}, scale_type{-2}};
+  auto const expected = fp_wrapper<RepType>{{40, 20, 30, 10, 0}, {1, 1, 1, 1, 0}, scale_type{-2}};
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MAX,
+                                                   static_cast<cudf::column_view>(col1).type(),
+                                                   static_cast<cudf::column_view>(col2).type());
+  auto const result =
+    cudf::experimental::binary_operation(col1, col2, binary_operator::NULL_MAX, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpNullMinSimple)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const trues    = std::vector<bool>(4, true);
+  auto const col1     = fp_wrapper<RepType>{{40, 30, 20, 10, 0}, {1, 1, 1, 0, 0}, scale_type{-1}};
+  auto const col2     = fp_wrapper<RepType>{{10, 20, 30, 40, 0}, {1, 0, 1, 1, 0}, scale_type{-1}};
+  auto const expected = fp_wrapper<RepType>{{10, 30, 20, 40, 0}, {1, 1, 1, 1, 0}, scale_type{-1}};
+
+  auto const type =
+    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MIN,
+                                                   static_cast<cudf::column_view>(col1).type(),
+                                                   static_cast<cudf::column_view>(col2).type());
+  auto const result =
+    cudf::experimental::binary_operation(col1, col2, binary_operator::NULL_MIN, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpNullEqualsSimple)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const trues    = std::vector<bool>(4, true);
+  auto const col1     = fp_wrapper<RepType>{{400, 300, 300, 100}, {1, 1, 1, 0}, scale_type{-2}};
+  auto const col2     = fp_wrapper<RepType>{{40, 200, 20, 400}, {1, 0, 1, 0}, scale_type{-1}};
+  auto const expected = wrapper<bool>{{1, 0, 0, 1}, {1, 1, 1, 1}};
+
+  auto const result = cudf::experimental::binary_operation(
+    col1, col2, binary_operator::NULL_EQUALS, cudf::data_type{type_id::BOOL8});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{100, 300, 500, 700}, scale_type{-2}};
+  auto const rhs      = fp_wrapper<RepType>{{4, 4, 4, 4}, scale_type{0}};
+  auto const expected = fp_wrapper<RepType>{{25, 75, 125, 175}, scale_type{-2}};
+
+  auto const type = data_type{type_to_id<decimalXX>(), -2};
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div2)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{100000, 300000, 500000, 700000}, scale_type{-3}};
+  auto const rhs      = fp_wrapper<RepType>{{20, 20, 20, 20}, scale_type{-1}};
+  auto const expected = fp_wrapper<RepType>{{5000, 15000, 25000, 35000}, scale_type{-2}};
+
+  auto const type = data_type{type_to_id<decimalXX>(), -2};
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div3)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{10000, 30000, 50000, 70000}, scale_type{-2}};
+  auto const rhs      = fp_wrapper<RepType>{{3, 9, 3, 3}, scale_type{0}};
+  auto const expected = fp_wrapper<RepType>{{3333, 3333, 16666, 23333}, scale_type{-2}};
+
+  auto const type = data_type{type_to_id<decimalXX>(), -2};
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div4)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{10, 30, 50, 70}, scale_type{1}};
+  auto const rhs      = make_fixed_point_scalar<decimalXX>(3, scale_type{0});
+  auto const expected = fp_wrapper<RepType>{{3, 10, 16, 23}, scale_type{1}};
+
+  auto const type = data_type{type_to_id<decimalXX>(), 1};
+  auto const result =
+    cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div6)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs = make_fixed_point_scalar<decimalXX>(3000, scale_type{-3});
+  auto const rhs = fp_wrapper<RepType>{{10, 30, 50, 70}, scale_type{-1}};
+
+  auto const expected = fp_wrapper<RepType>{{300, 100, 60, 42}, scale_type{-2}};
+
+  auto const type = data_type{type_to_id<decimalXX>(), -2};
+  auto const result =
+    cudf::experimental::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div7)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs = make_fixed_point_scalar<decimalXX>(1200, scale_type{0});
+  auto const rhs = fp_wrapper<RepType>{{100, 200, 300, 500, 600, 800, 1200, 1300}, scale_type{-2}};
+
+  auto const expected = fp_wrapper<RepType>{{12, 6, 4, 2, 2, 1, 1, 0}, scale_type{2}};
+
+  auto const type = data_type{type_to_id<decimalXX>(), 2};
+  auto const result =
+    cudf::experimental::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div8)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{4000, 6000, 80000}, scale_type{-1}};
+  auto const rhs      = make_fixed_point_scalar<decimalXX>(5000, scale_type{-3});
+  auto const expected = fp_wrapper<RepType>{{0, 1, 16}, scale_type{2}};
+
+  auto const type = data_type{type_to_id<decimalXX>(), 2};
+  auto const result =
+    cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div9)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{10, 20, 30}, scale_type{2}};
+  auto const rhs      = make_fixed_point_scalar<decimalXX>(7, scale_type{1});
+  auto const expected = fp_wrapper<RepType>{{1, 2, 4}, scale_type{1}};
+
+  auto const type = data_type{type_to_id<decimalXX>(), 1};
+  auto const result =
+    cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div10)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{100, 200, 300}, scale_type{1}};
+  auto const rhs      = make_fixed_point_scalar<decimalXX>(7, scale_type{0});
+  auto const expected = fp_wrapper<RepType>{{14, 28, 42}, scale_type{1}};
+
+  auto const type = data_type{type_to_id<decimalXX>(), 1};
+  auto const result =
+    cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div11)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const lhs      = fp_wrapper<RepType>{{1000, 2000, 3000}, scale_type{1}};
+  auto const rhs      = fp_wrapper<RepType>{{7, 7, 7}, scale_type{0}};
+  auto const expected = fp_wrapper<RepType>{{142, 285, 428}, scale_type{1}};
+
+  auto const type = data_type{type_to_id<decimalXX>(), 1};
+  auto const result =
+    cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpThrows)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  auto const col           = fp_wrapper<RepType>{{100, 300, 500, 700}, scale_type{-2}};
+  auto const non_bool_type = data_type{type_to_id<decimalXX>(), -2};
+  auto const float_type    = data_type{type_id::FLOAT32};
+  EXPECT_THROW(
+    cudf::experimental::binary_operation(col, col, cudf::binary_operator::LESS, non_bool_type),
+    cudf::logic_error);
+  // Allowed now, but not allowed in jit.
+  // EXPECT_THROW(cudf::experimental::binary_operation(col, col, cudf::binary_operator::MUL,
+  // float_type),
+  //              cudf::logic_error);
+}
+
+}  // namespace cudf::test::binop
diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp
new file mode 100644
index 00000000000..081ae41fef1
--- /dev/null
+++ b/cpp/tests/binaryop/binop-compiled-test.cpp
@@ -0,0 +1,610 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/binaryop.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/types.hpp>
+#include <cudf/unary.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <tests/binaryop/assert-binops.h>
+#include <tests/binaryop/binop-fixture.hpp>
+#include "cudf/utilities/error.hpp"
+
+#include <type_traits>
+
+namespace cudf::test::binop {
+
+template <typename T>
+auto lhs_random_column(size_type size)
+{
+  return BinaryOperationTest::make_random_wrapped_column<T>(size);
+}
+
+template <>
+auto lhs_random_column<std::string>(size_type size)
+{
+  return cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"},
+                                            {1, 1, 0, 1, 1, 1, 1});
+}
+template <typename T>
+auto rhs_random_column(size_type size)
+{
+  return BinaryOperationTest::make_random_wrapped_column<T>(size);
+}
+template <>
+auto rhs_random_column<std::string>(size_type size)
+{
+  return cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "<null>", "bb", "eee"},
+                                            {1, 1, 1, 1, 0, 1, 1});
+}
+
+// combinations to test
+//     n  t   d
+// n n.n n.t n.d
+// t t.n t.t t.d
+// d d.n d.t d.d
+
+constexpr size_type col_size = 10000;
+template <typename T>
+struct BinaryOperationCompiledTest : public BinaryOperationTest {
+  using TypeOut = cudf::test::GetType<T, 0>;
+  using TypeLhs = cudf::test::GetType<T, 1>;
+  using TypeRhs = cudf::test::GetType<T, 2>;
+
+  template <template <typename... Ty> class FunctorOP>
+  void test(cudf::binary_operator op)
+  {
+    using OPERATOR = FunctorOP<TypeOut, TypeLhs, TypeRhs>;
+
+    auto lhs = lhs_random_column<TypeLhs>(col_size);
+    auto rhs = rhs_random_column<TypeRhs>(col_size);
+
+    auto out = cudf::experimental::binary_operation(lhs, rhs, op, data_type(type_to_id<TypeOut>()));
+    ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, OPERATOR());
+
+    auto s_lhs = this->template make_random_wrapped_scalar<TypeLhs>();
+    auto s_rhs = this->template make_random_wrapped_scalar<TypeRhs>();
+
+    out = cudf::experimental::binary_operation(lhs, s_rhs, op, data_type(type_to_id<TypeOut>()));
+    ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, s_rhs, OPERATOR());
+    out = cudf::experimental::binary_operation(s_lhs, rhs, op, data_type(type_to_id<TypeOut>()));
+    ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, s_lhs, rhs, OPERATOR());
+  }
+};
+
+// ADD
+//     n      t     d
+// n n + n
+// t      	     	t + d
+// d      	d + t	d + d
+
+using Add_types =
+  cudf::test::Types<cudf::test::Types<bool, bool, float>,
+                    cudf::test::Types<int16_t, double, uint8_t>,
+                    cudf::test::Types<timestamp_s, timestamp_s, duration_s>,
+                    cudf::test::Types<timestamp_ns, duration_ms, timestamp_us>,
+                    cudf::test::Types<duration_us, duration_us, duration_D>,
+                    // cudf::test::Types<duration_s, int16_t, int64_t>, //valid
+                    cudf::test::Types<numeric::decimal32, numeric::decimal32, numeric::decimal32>,
+                    cudf::test::Types<int, numeric::decimal32, numeric::decimal32>,
+                    // Extras
+                    cudf::test::Types<duration_D, duration_D, duration_D>,
+                    cudf::test::Types<timestamp_D, timestamp_D, duration_D>,
+                    cudf::test::Types<timestamp_s, timestamp_D, duration_s>,
+                    cudf::test::Types<timestamp_ms, timestamp_ms, duration_s>,
+                    cudf::test::Types<timestamp_ns, timestamp_ms, duration_ns>>;
+template <typename T>
+struct BinaryOperationCompiledTest_Add : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_Add, Add_types);
+
+TYPED_TEST(BinaryOperationCompiledTest_Add, Vector_Vector)
+{
+  this->template test<cudf::library::operation::Add>(cudf::binary_operator::ADD);
+}
+
+// SUB
+//     n      t     d
+// n n - n
+// t      	t - t	t - d
+// d      	     	d - d
+
+using Sub_types =
+  cudf::test::Types<cudf::test::Types<int32_t, bool, float>,                  // n - n
+                    cudf::test::Types<duration_D, timestamp_D, timestamp_D>,  // t - t
+                    cudf::test::Types<timestamp_s, timestamp_D, duration_s>,  // t - d
+                    cudf::test::Types<duration_ns, duration_us, duration_s>,  // d - d
+                    cudf::test::Types<duration_us, duration_us, duration_s>,  // d - d
+                    cudf::test::Types<numeric::decimal32, numeric::decimal32, numeric::decimal32>,
+                    cudf::test::Types<int, numeric::decimal32, numeric::decimal32>>;
+template <typename T>
+struct BinaryOperationCompiledTest_Sub : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_Sub, Sub_types);
+
+TYPED_TEST(BinaryOperationCompiledTest_Sub, Vector_Vector)
+{
+  this->template test<cudf::library::operation::Sub>(cudf::binary_operator::SUB);
+}
+
+// MUL
+//     n      t     d
+// n n * n	     	n * d
+// t
+// d d * n
+using Mul_types =
+  cudf::test::Types<cudf::test::Types<int32_t, u_int64_t, float>,
+                    cudf::test::Types<duration_s, u_int64_t, duration_s>,
+                    cudf::test::Types<duration_ms, duration_D, int16_t>,
+                    cudf::test::Types<duration_ns, duration_us, uint8_t>,
+                    cudf::test::Types<numeric::decimal32, numeric::decimal32, numeric::decimal32>,
+                    cudf::test::Types<int, numeric::decimal32, numeric::decimal32>,
+                    cudf::test::Types<numeric::decimal32, int, int>>;
+template <typename T>
+struct BinaryOperationCompiledTest_Mul : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_Mul, Mul_types);
+
+TYPED_TEST(BinaryOperationCompiledTest_Mul, Vector_Vector)
+{
+  this->template test<cudf::library::operation::Mul>(cudf::binary_operator::MUL);
+}
+
+// DIV
+//     n      t     d
+// n n / n
+// t
+// d d / n	     	d / d
+using Div_types =
+  cudf::test::Types<cudf::test::Types<int16_t, u_int64_t, u_int64_t>,
+                    cudf::test::Types<double, int8_t, int64_t>,
+                    cudf::test::Types<duration_ms, duration_s, u_int32_t>,
+                    cudf::test::Types<duration_ns, duration_D, int16_t>,
+                    cudf::test::Types<double, duration_D, duration_ns>,
+                    cudf::test::Types<float, duration_ms, duration_ns>,
+                    cudf::test::Types<u_int64_t, duration_us, duration_ns>,
+                    cudf::test::Types<numeric::decimal32, numeric::decimal32, numeric::decimal32>,
+                    cudf::test::Types<int, numeric::decimal32, numeric::decimal32>>;
+template <typename T>
+struct BinaryOperationCompiledTest_Div : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_Div, Div_types);
+
+TYPED_TEST(BinaryOperationCompiledTest_Div, Vector_Vector)
+{
+  this->template test<cudf::library::operation::Div>(cudf::binary_operator::DIV);
+}
+
+// TRUE-DIV
+//     n      t     d
+// n n / n
+// t
+// d
+using TrueDiv_types =
+  cudf::test::Types<cudf::test::Types<int16_t, u_int64_t, u_int64_t>,
+                    cudf::test::Types<double, int8_t, int64_t>,
+                    cudf::test::Types<int8_t, bool, u_int32_t>,
+                    cudf::test::Types<u_int64_t, float, int16_t>,
+                    cudf::test::Types<numeric::decimal32, numeric::decimal32, numeric::decimal32>,
+                    cudf::test::Types<int, numeric::decimal32, numeric::decimal32>>;
+template <typename T>
+struct BinaryOperationCompiledTest_TrueDiv : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_TrueDiv, TrueDiv_types);
+
+TYPED_TEST(BinaryOperationCompiledTest_TrueDiv, Vector_Vector)
+{
+  this->template test<cudf::library::operation::TrueDiv>(cudf::binary_operator::TRUE_DIV);
+}
+// FLOOR_DIV
+//     n      t     d
+// n n / n
+// t
+// d
+TYPED_TEST(BinaryOperationCompiledTest_TrueDiv, FloorDiv_Vector_Vector)
+{
+  this->template test<cudf::library::operation::FloorDiv>(cudf::binary_operator::FLOOR_DIV);
+}
+
+// MOD
+//     n      t     d
+// n n % n
+// t
+// d d % n	     	d % d
+using Mod_types = cudf::test::Types<cudf::test::Types<int16_t, u_int64_t, u_int64_t>,
+                                    cudf::test::Types<double, int8_t, int64_t>,
+                                    cudf::test::Types<duration_ms, duration_s, u_int32_t>,
+                                    cudf::test::Types<duration_D, duration_D, int16_t>,
+                                    cudf::test::Types<duration_ns, duration_D, int16_t>,
+                                    cudf::test::Types<duration_ns, duration_us, duration_ns>>;
+template <typename T>
+struct BinaryOperationCompiledTest_Mod : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_Mod, Mod_types);
+
+TYPED_TEST(BinaryOperationCompiledTest_Mod, Vector_Vector)
+{
+  this->template test<cudf::library::operation::Mod>(cudf::binary_operator::MOD);
+}
+
+// PYMOD
+//     n      t     d
+// n n % n
+// t
+// d      	     	d % d
+using PyMod_types = cudf::test::Types<cudf::test::Types<int16_t, u_int64_t, u_int64_t>,
+                                      cudf::test::Types<double, int8_t, int64_t>,
+                                      cudf::test::Types<double, double, double>,
+                                      cudf::test::Types<duration_ns, duration_us, duration_ns>>;
+template <typename T>
+struct BinaryOperationCompiledTest_PyMod : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_PyMod, PyMod_types);
+TYPED_TEST(BinaryOperationCompiledTest_PyMod, Vector_Vector)
+{
+  this->template test<cudf::library::operation::PyMod>(cudf::binary_operator::PYMOD);
+}
+
+// POW
+//     n      t     d
+// n n ^ n
+// t
+// d
+
+using Pow_types = cudf::test::Types<cudf::test::Types<double, int64_t, int64_t>,
+                                    cudf::test::Types<float, float, float>,
+                                    cudf::test::Types<int, int32_t, float>,
+                                    cudf::test::Types<float, int, int>,
+                                    cudf::test::Types<double, int64_t, int32_t>,
+                                    cudf::test::Types<double, double, double>,
+                                    cudf::test::Types<double, float, double>,
+                                    cudf::test::Types<double, int32_t, int64_t>>;
+
+template <typename T>
+struct BinaryOperationCompiledTest_FloatOps : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_FloatOps, Pow_types);
+
+TYPED_TEST(BinaryOperationCompiledTest_FloatOps, Pow_Vector_Vector)
+{
+  using TypeOut = typename TestFixture::TypeOut;
+  using TypeLhs = typename TestFixture::TypeLhs;
+  using TypeRhs = typename TestFixture::TypeRhs;
+
+  using POW = cudf::library::operation::Pow<TypeOut, TypeLhs, TypeRhs>;
+
+  auto lhs = []() {
+    // resulting value can not be represented by the target type => behavior is undefined
+    // -2147483648 in host, 2147483647 in device
+    if constexpr (std::is_same_v<TypeOut, int>) {
+      auto elements =
+        cudf::detail::make_counting_transform_iterator(1, [](auto i) { return i % 5; });
+      return fixed_width_column_wrapper<TypeLhs>(elements, elements + 100);
+    }
+    return lhs_random_column<TypeLhs>(100);
+  }();
+  auto rhs = rhs_random_column<TypeRhs>(100);
+
+  auto out = cudf::experimental::binary_operation(
+    lhs, rhs, cudf::binary_operator::POW, data_type(type_to_id<TypeOut>()));
+
+  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, POW(), NearEqualComparator<TypeOut>{2});
+}
+
+// LOG_BASE
+//     n      t     d
+// n log(n, n)
+// t
+// d
+TYPED_TEST(BinaryOperationCompiledTest_FloatOps, LogBase_Vector_Vector)
+{
+  using TypeOut = typename TestFixture::TypeOut;
+  using TypeLhs = typename TestFixture::TypeLhs;
+  using TypeRhs = typename TestFixture::TypeRhs;
+
+  using LOG_BASE = cudf::library::operation::LogBase<TypeOut, TypeLhs, TypeRhs>;
+
+  // Make sure there are no zeros
+  auto elements = cudf::detail::make_counting_transform_iterator(
+    1, [](auto i) { return sizeof(TypeLhs) > 4 ? std::pow(2, i) : i + 30; });
+  fixed_width_column_wrapper<TypeLhs> lhs(elements, elements + 50);
+
+  // Find log to the base 7
+  auto rhs_elements = cudf::detail::make_counting_transform_iterator(0, [](auto) { return 7; });
+  fixed_width_column_wrapper<TypeRhs> rhs(rhs_elements, rhs_elements + 50);
+
+  auto out = cudf::experimental::binary_operation(
+    lhs, rhs, cudf::binary_operator::LOG_BASE, data_type(type_to_id<TypeOut>()));
+
+  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, LOG_BASE());
+}
+
+// ATAN2
+//     n      t     d
+// n ATan2(n, n)
+// t
+// d
+TYPED_TEST(BinaryOperationCompiledTest_FloatOps, ATan2_Vector_Vector)
+{
+  using TypeOut = typename TestFixture::TypeOut;
+  using TypeLhs = typename TestFixture::TypeLhs;
+  using TypeRhs = typename TestFixture::TypeRhs;
+
+  using ATAN2 = cudf::library::operation::ATan2<TypeOut, TypeLhs, TypeRhs>;
+
+  auto lhs = lhs_random_column<TypeLhs>(col_size);
+  auto rhs = rhs_random_column<TypeRhs>(col_size);
+
+  auto out = cudf::experimental::binary_operation(
+    lhs, rhs, cudf::binary_operator::ATAN2, data_type(type_to_id<TypeOut>()));
+
+  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ATAN2(), NearEqualComparator<TypeOut>{2});
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_FloatOps, PMod_Vector_Vector)
+{
+  this->template test<cudf::library::operation::PMod>(cudf::binary_operator::PMOD);
+}
+
+// Bit Operations
+//     n      t     d
+// n n . n
+// t
+// d
+
+// i.i, i.u, u.i, u.u -> i
+// i.i, i.u, u.i, u.u -> u
+using Bit_types = cudf::test::Types<cudf::test::Types<int16_t, int8_t, int16_t>,
+                                    cudf::test::Types<int64_t, int32_t, uint16_t>,
+                                    cudf::test::Types<int64_t, uint64_t, int64_t>,
+                                    cudf::test::Types<int16_t, uint32_t, uint8_t>,
+                                    // cudf::test::Types<bool, int8_t, uint8_t>, // valid
+                                    cudf::test::Types<uint16_t, int8_t, int16_t>,
+                                    cudf::test::Types<uint64_t, int32_t, uint16_t>,
+                                    cudf::test::Types<uint64_t, uint64_t, int64_t>,
+                                    cudf::test::Types<uint16_t, uint8_t, uint32_t>>;
+template <typename T>
+struct BinaryOperationCompiledTest_Bit : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_Bit, Bit_types);
+
+TYPED_TEST(BinaryOperationCompiledTest_Bit, BitwiseAnd_Vector_Vector)
+{
+  this->template test<cudf::library::operation::BitwiseAnd>(cudf::binary_operator::BITWISE_AND);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Bit, BitwiseOr_Vector_Vector)
+{
+  this->template test<cudf::library::operation::BitwiseOr>(cudf::binary_operator::BITWISE_OR);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Bit, BitwiseXor_Vector_Vector)
+{
+  this->template test<cudf::library::operation::BitwiseXor>(cudf::binary_operator::BITWISE_XOR);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Bit, ShiftLeft_Vector_Vector)
+{
+  this->template test<cudf::library::operation::ShiftLeft>(cudf::binary_operator::SHIFT_LEFT);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Bit, ShiftRight_Vector_Vector)
+{
+  this->template test<cudf::library::operation::ShiftRight>(cudf::binary_operator::SHIFT_RIGHT);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Bit, ShiftRightUnsigned_Vector_Vector)
+{
+  this->template test<cudf::library::operation::ShiftRightUnsigned>(
+    cudf::binary_operator::SHIFT_RIGHT_UNSIGNED);
+}
+
+// Logical Operations
+//     n      t     d
+// n n . n
+// t
+// d
+using Logical_types = cudf::test::Types<cudf::test::Types<bool, int8_t, int16_t>,
+                                        cudf::test::Types<bool, int32_t, uint16_t>,
+                                        cudf::test::Types<bool, uint64_t, double>,
+                                        cudf::test::Types<bool, int8_t, int16_t>,
+                                        cudf::test::Types<bool, float, uint16_t>,
+                                        cudf::test::Types<bool, uint64_t, int64_t>,
+                                        cudf::test::Types<bool, uint8_t, uint32_t>,
+                                        cudf::test::Types<bool, uint64_t, int64_t>>;
+template <typename T>
+struct BinaryOperationCompiledTest_Logical : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_Logical, Logical_types);
+
+TYPED_TEST(BinaryOperationCompiledTest_Logical, LogicalAnd_Vector_Vector)
+{
+  this->template test<cudf::library::operation::LogicalAnd>(cudf::binary_operator::LOGICAL_AND);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Logical, LogicalOr_Vector_Vector)
+{
+  this->template test<cudf::library::operation::LogicalOr>(cudf::binary_operator::LOGICAL_OR);
+}
+
+// Comparison Operations ==, !=, <, >, <=, >=
+// n<!=>n, t<!=>t, d<!=>d, s<!=>s, dc<!=>dc
+using Comparison_types =
+  cudf::test::Types<cudf::test::Types<bool, int8_t, int16_t>,
+                    cudf::test::Types<bool, uint32_t, uint16_t>,
+                    cudf::test::Types<bool, uint64_t, double>,
+                    cudf::test::Types<bool, timestamp_D, timestamp_s>,
+                    cudf::test::Types<bool, timestamp_ns, timestamp_us>,
+                    cudf::test::Types<bool, duration_ns, duration_ns>,
+                    cudf::test::Types<bool, duration_us, duration_s>,
+                    cudf::test::Types<bool, std::string, std::string>,
+                    cudf::test::Types<bool, numeric::decimal32, numeric::decimal32>>;
+
+template <typename T>
+struct BinaryOperationCompiledTest_Comparison : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_Comparison, Comparison_types);
+
+TYPED_TEST(BinaryOperationCompiledTest_Comparison, Equal_Vector_Vector)
+{
+  this->template test<cudf::library::operation::Equal>(cudf::binary_operator::EQUAL);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Comparison, NotEqual_Vector_Vector)
+{
+  this->template test<cudf::library::operation::NotEqual>(cudf::binary_operator::NOT_EQUAL);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Comparison, Less_Vector_Vector)
+{
+  this->template test<cudf::library::operation::Less>(cudf::binary_operator::LESS);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Comparison, Greater_Vector_Vector)
+{
+  this->template test<cudf::library::operation::Greater>(cudf::binary_operator::GREATER);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Comparison, LessEqual_Vector_Vector)
+{
+  this->template test<cudf::library::operation::LessEqual>(cudf::binary_operator::LESS_EQUAL);
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_Comparison, GreaterEqual_Vector_Vector)
+{
+  this->template test<cudf::library::operation::GreaterEqual>(cudf::binary_operator::GREATER_EQUAL);
+}
+
+// Null Operations NullMax, NullMin
+// Min(n,n) , Min(t,t), Min(d, d), Min(s, s), Min(dc, dc), Min(n,dc), Min(dc, n)
+//    n   t   d  s  dc
+// n  .             .
+// t      .
+// d          .
+// s             .
+// dc .             .
+using Null_types =
+  cudf::test::Types<cudf::test::Types<int16_t, int8_t, int16_t>,
+                    cudf::test::Types<uint16_t, uint32_t, uint16_t>,
+                    cudf::test::Types<double, uint64_t, double>,
+                    cudf::test::Types<timestamp_s, timestamp_D, timestamp_s>,
+                    cudf::test::Types<duration_ns, duration_us, duration_s>,
+                    // cudf::test::Types<std::string, std::string, std::string>, // only fixed-width
+                    cudf::test::Types<numeric::decimal32, numeric::decimal32, numeric::decimal32>,
+                    cudf::test::Types<numeric::decimal32, uint32_t, numeric::decimal32>,
+                    cudf::test::Types<int64_t, numeric::decimal64, int64_t>>;
+
+template <typename T>
+struct BinaryOperationCompiledTest_NullOps : public BinaryOperationCompiledTest<T> {
+};
+TYPED_TEST_CASE(BinaryOperationCompiledTest_NullOps, Null_types);
+
+template <typename TypeOut, typename TypeLhs, typename TypeRhs, class OP>
+auto NullOp_Result(column_view lhs, column_view rhs)
+{
+  auto [lhs_data, lhs_mask] = cudf::test::to_host<TypeLhs>(lhs);
+  auto [rhs_data, rhs_mask] = cudf::test::to_host<TypeRhs>(rhs);
+  std::vector<TypeOut> result(lhs.size());
+  std::vector<bool> result_mask;
+  std::transform(thrust::make_counting_iterator(0),
+                 thrust::make_counting_iterator(lhs.size()),
+                 result.begin(),
+                 [&lhs_data, &lhs_mask, &rhs_data, &rhs_mask, &result_mask](auto i) -> TypeOut {
+                   auto lhs_valid    = lhs_mask.data() and cudf::bit_is_set(lhs_mask.data(), i);
+                   auto rhs_valid    = rhs_mask.data() and cudf::bit_is_set(rhs_mask.data(), i);
+                   bool output_valid = lhs_valid or rhs_valid;
+                   auto result = OP{}(lhs_data[i], rhs_data[i], lhs_valid, rhs_valid, output_valid);
+                   result_mask.push_back(output_valid);
+                   return result;
+                 });
+  return cudf::test::fixed_width_column_wrapper<TypeOut>(
+    result.cbegin(), result.cend(), result_mask.cbegin());
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_NullOps, NullEquals_Vector_Vector)
+{
+  using TypeOut     = bool;
+  using TypeLhs     = typename TestFixture::TypeLhs;
+  using TypeRhs     = typename TestFixture::TypeRhs;
+  using NULL_EQUALS = cudf::library::operation::NullEquals<TypeOut, TypeLhs, TypeRhs>;
+
+  auto lhs            = lhs_random_column<TypeLhs>(col_size);
+  auto rhs            = rhs_random_column<TypeRhs>(col_size);
+  auto const expected = NullOp_Result<TypeOut, TypeLhs, TypeRhs, NULL_EQUALS>(lhs, rhs);
+
+  auto const result = cudf::experimental::binary_operation(
+    lhs, rhs, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+using BinaryOperationCompiledTest_NullOpsString =
+  BinaryOperationCompiledTest_NullOps<cudf::test::Types<std::string, std::string, std::string>>;
+TEST_F(BinaryOperationCompiledTest_NullOpsString, NullEquals_Vector_Vector)
+{
+  using TypeOut     = bool;
+  using TypeLhs     = std::string;
+  using TypeRhs     = std::string;
+  using NULL_EQUALS = cudf::library::operation::NullEquals<TypeOut, TypeLhs, TypeRhs>;
+
+  auto lhs            = lhs_random_column<TypeLhs>(col_size);
+  auto rhs            = rhs_random_column<TypeRhs>(col_size);
+  auto const expected = NullOp_Result<TypeOut, TypeLhs, TypeRhs, NULL_EQUALS>(lhs, rhs);
+
+  auto const result = cudf::experimental::binary_operation(
+    lhs, rhs, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_NullOps, NullMax_Vector_Vector)
+{
+  using TypeOut  = typename TestFixture::TypeOut;
+  using TypeLhs  = typename TestFixture::TypeLhs;
+  using TypeRhs  = typename TestFixture::TypeRhs;
+  using NULL_MAX = cudf::library::operation::NullMax<TypeOut, TypeLhs, TypeRhs>;
+
+  auto lhs            = lhs_random_column<TypeLhs>(col_size);
+  auto rhs            = rhs_random_column<TypeRhs>(col_size);
+  auto const expected = NullOp_Result<TypeOut, TypeLhs, TypeRhs, NULL_MAX>(lhs, rhs);
+
+  auto const result = cudf::experimental::binary_operation(
+    lhs, rhs, cudf::binary_operator::NULL_MAX, data_type(type_to_id<TypeOut>()));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+TYPED_TEST(BinaryOperationCompiledTest_NullOps, NullMin_Vector_Vector)
+{
+  using TypeOut  = typename TestFixture::TypeOut;
+  using TypeLhs  = typename TestFixture::TypeLhs;
+  using TypeRhs  = typename TestFixture::TypeRhs;
+  using NULL_MIN = cudf::library::operation::NullMin<TypeOut, TypeLhs, TypeRhs>;
+
+  auto lhs            = lhs_random_column<TypeLhs>(col_size);
+  auto rhs            = rhs_random_column<TypeRhs>(col_size);
+  auto const expected = NullOp_Result<TypeOut, TypeLhs, TypeRhs, NULL_MIN>(lhs, rhs);
+
+  auto const result = cudf::experimental::binary_operation(
+    lhs, rhs, cudf::binary_operator::NULL_MIN, data_type(type_to_id<TypeOut>()));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+}
+
+}  // namespace cudf::test::binop
diff --git a/cpp/tests/binaryop/binop-fixture.hpp b/cpp/tests/binaryop/binop-fixture.hpp
index c5ef3727e4e..25e1de2d368 100644
--- a/cpp/tests/binaryop/binop-fixture.hpp
+++ b/cpp/tests/binaryop/binop-fixture.hpp
@@ -23,6 +23,8 @@
 #include <cudf/utilities/type_dispatcher.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <string>
+#include <type_traits>
 
 namespace cudf {
 namespace test {
@@ -35,13 +37,13 @@ struct BinaryOperationTest : public cudf::test::BaseFixture {
   static constexpr int r_max = 10;
 
   template <typename T>
-  auto make_data_iter(cudf::test::UniformRandomGenerator<T>& rand_gen)
+  static auto make_data_iter(cudf::test::UniformRandomGenerator<T>& rand_gen)
   {
     return cudf::detail::make_counting_transform_iterator(
       0, [&](auto row) { return rand_gen.generate(); });
   }
 
-  auto make_validity_iter()
+  static auto make_validity_iter()
   {
     cudf::test::UniformRandomGenerator<uint8_t> rand_gen(r_min, r_max);
     uint8_t mod_base = rand_gen.generate();
@@ -50,7 +52,7 @@ struct BinaryOperationTest : public cudf::test::BaseFixture {
   }
 
   template <typename T>
-  auto make_random_wrapped_column(size_type size)
+  static auto make_random_wrapped_column(size_type size)
   {
     cudf::test::UniformRandomGenerator<T> rand_gen(r_min, r_max);
     auto data_iter     = make_data_iter(rand_gen);
@@ -59,12 +61,21 @@ struct BinaryOperationTest : public cudf::test::BaseFixture {
     return cudf::test::fixed_width_column_wrapper<T>(data_iter, data_iter + size, validity_iter);
   }
 
-  template <typename T>
+  template <typename T, typename std::enable_if_t<!std::is_same_v<T, std::string>>* = nullptr>
   auto make_random_wrapped_scalar()
   {
     cudf::test::UniformRandomGenerator<T> rand_gen(r_min, r_max);
     return cudf::scalar_type_t<T>(rand_gen.generate());
   }
+
+  template <typename T, typename std::enable_if_t<std::is_same_v<T, std::string>>* = nullptr>
+  auto make_random_wrapped_scalar()
+  {
+    cudf::test::UniformRandomGenerator<uint8_t> rand_gen(r_min, r_max);
+    uint8_t size = rand_gen.generate();
+    std::string str{"ஔⒶbc⁂∰ൠ \tنж水✉♪✿™"};
+    return cudf::scalar_type_t<T>(string_view(str.data(), size));
+  }
 };
 
 }  // namespace binop
diff --git a/cpp/tests/binaryop/util/operation.h b/cpp/tests/binaryop/util/operation.h
index 8ee6475ee10..481e5cfd4a9 100644
--- a/cpp/tests/binaryop/util/operation.h
+++ b/cpp/tests/binaryop/util/operation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Copyright 2018-2019 BlazingDB, Inc.
  *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
@@ -209,6 +209,11 @@ struct Equal {
   TypeOut operator()(TypeLhs x, TypeRhs y) { return (x == y); }
 };
 
+template <typename TypeOut, typename TypeLhs, typename TypeRhs>
+struct NotEqual {
+  TypeOut operator()(TypeLhs x, TypeRhs y) { return (x != y); }
+};
+
 template <typename TypeOut, typename TypeLhs, typename TypeRhs>
 struct Less {
   TypeOut operator()(TypeLhs x, TypeRhs y) { return (x < y); }
@@ -287,8 +292,8 @@ struct PMod {
 
   TypeOut operator()(TypeLhs x, TypeRhs y) const
   {
-    CommonArgsT xconv{x};
-    CommonArgsT yconv{y};
+    CommonArgsT xconv{static_cast<CommonArgsT>(x)};
+    CommonArgsT yconv{static_cast<CommonArgsT>(y)};
     auto rem = std::fmod(xconv, yconv);
     if (rem < 0) rem = std::fmod(rem + yconv, yconv);
     return static_cast<TypeOut>(rem);
@@ -303,6 +308,61 @@ struct ATan2 {
   }
 };
 
+template <typename TypeOut, typename TypeLhs, typename TypeRhs>
+struct PyMod {
+  TypeOut operator()(TypeLhs x, TypeRhs y) const
+  {
+    if constexpr (std::is_floating_point_v<TypeLhs> or std::is_floating_point_v<TypeRhs>) {
+      double x1 = static_cast<double>(x);
+      double y1 = static_cast<double>(y);
+      return fmod(fmod(x1, y1) + y1, y1);
+    } else {
+      return ((x % y) + y) % y;
+    }
+    return {};
+  }
+};
+
+template <typename TypeOut, typename TypeLhs, typename TypeRhs>
+struct NullEquals {
+  TypeOut operator()(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) const
+  {
+    output_valid = true;
+    if (!lhs_valid && !rhs_valid) return true;
+    using common_t = std::common_type_t<TypeLhs, TypeRhs>;
+    if (lhs_valid && rhs_valid) return static_cast<common_t>(x) == static_cast<common_t>(y);
+    return false;
+  }
+};
+
+template <typename TypeOut, typename TypeLhs, typename TypeRhs>
+struct NullMax {
+  TypeOut operator()(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) const
+  {
+    output_valid = lhs_valid or rhs_valid;
+    if (lhs_valid or rhs_valid) {
+      return (lhs_valid and (!rhs_valid or static_cast<TypeOut>(x) > static_cast<TypeOut>(y)))
+               ? static_cast<TypeOut>(x)
+               : static_cast<TypeOut>(y);
+    } else
+      return TypeOut{};
+  }
+};
+
+template <typename TypeOut, typename TypeLhs, typename TypeRhs>
+struct NullMin {
+  TypeOut operator()(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) const
+  {
+    output_valid = lhs_valid or rhs_valid;
+    if (lhs_valid or rhs_valid) {
+      return (lhs_valid and (!rhs_valid or static_cast<TypeOut>(x) < static_cast<TypeOut>(y)))
+               ? static_cast<TypeOut>(x)
+               : static_cast<TypeOut>(y);
+    } else
+      return TypeOut{};
+  }
+};
+
 }  // namespace operation
 }  // namespace library
 }  // namespace cudf
diff --git a/java/src/main/java/ai/rapids/cudf/BinaryOp.java b/java/src/main/java/ai/rapids/cudf/BinaryOp.java
index b6d3293205a..8b58d8383b4 100644
--- a/java/src/main/java/ai/rapids/cudf/BinaryOp.java
+++ b/java/src/main/java/ai/rapids/cudf/BinaryOp.java
@@ -18,7 +18,7 @@
 import java.util.EnumSet;
 
 /**
- * Mathematical unary operations.
+ * Mathematical binary operations.
  */
 public enum BinaryOp {
   ADD(0),
@@ -28,30 +28,29 @@ public enum BinaryOp {
   TRUE_DIV(4), // divide after promoting to FLOAT64 point
   FLOOR_DIV(5), // divide after promoting to FLOAT64 and flooring the result
   MOD(6),
-  PYMOD(7), // mod operator % follow by python's sign rules for negatives
-  POW(8),
-  EQUAL(9),
-  NOT_EQUAL(10),
-  LESS(11),
-  GREATER(12),
-  LESS_EQUAL(13), // <=
-  GREATER_EQUAL(14), // >=
+  PMOD(7), // pmod
+  PYMOD(8), // mod operator % follow by python's sign rules for negatives
+  POW(9),
+  LOG_BASE(10), // logarithm to the base
+  ATAN2(11), // atan2
+  SHIFT_LEFT(12), // bitwise shift left (<<)
+  SHIFT_RIGHT(13), // bitwise shift right (>>)
+  SHIFT_RIGHT_UNSIGNED(14), // bitwise shift right (>>>)
   BITWISE_AND(15),
   BITWISE_OR(16),
   BITWISE_XOR(17),
   LOGICAL_AND(18),
   LOGICAL_OR(19),
-  //NOT IMPLEMENTED YET COALESCE(20); // x == null ? y : x
-  //NOT IMPLEMENTED YET GENERIC_BINARY(21);
-  SHIFT_LEFT(22), // bitwise shift left (<<)
-  SHIFT_RIGHT(23), // bitwise shift right (>>)
-  SHIFT_RIGHT_UNSIGNED(24), // bitwise shift right (>>>)
-  LOG_BASE(25), // logarithm to the base
-  ATAN2(26), // atan2
-  PMOD(27), // pmod
-  NULL_EQUALS(28), // like EQUAL but NULL == NULL is TRUE and NULL == not NULL is FALSE
-  NULL_MAX(29), // MAX but NULL < not NULL
-  NULL_MIN(30); // MIN but NULL > not NULL
+  EQUAL(20),
+  NOT_EQUAL(21),
+  LESS(22),
+  GREATER(23),
+  LESS_EQUAL(24), // <=
+  GREATER_EQUAL(25), // >=
+  NULL_EQUALS(26), // like EQUAL but NULL == NULL is TRUE and NULL == not NULL is FALSE
+  NULL_MAX(27), // MAX but NULL < not NULL
+  NULL_MIN(28); // MIN but NULL > not NULL
+  //NOT IMPLEMENTED YET GENERIC_BINARY(29);
 
 
   static final EnumSet<BinaryOp> COMPARISON = EnumSet.of(
diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx
index 5eaec640b15..70a9aa2165f 100644
--- a/python/cudf/cudf/_lib/binaryop.pyx
+++ b/python/cudf/cudf/_lib/binaryop.pyx
@@ -87,9 +87,6 @@ class BinaryOperation(IntEnum):
     L_OR = (
         <underlying_type_t_binary_operator> binary_operator.LOGICAL_OR
     )
-    COALESCE = (
-        <underlying_type_t_binary_operator> binary_operator.COALESCE
-    )
     GENERIC_BINARY = (
         <underlying_type_t_binary_operator> binary_operator.GENERIC_BINARY
     )
diff --git a/python/cudf/cudf/_lib/cpp/binaryop.pxd b/python/cudf/cudf/_lib/cpp/binaryop.pxd
index 2e36070a164..7a9c7347b6a 100644
--- a/python/cudf/cudf/_lib/cpp/binaryop.pxd
+++ b/python/cudf/cudf/_lib/cpp/binaryop.pxd
@@ -33,7 +33,6 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
         BITWISE_XOR "cudf::binary_operator::BITWISE_XOR"
         LOGICAL_AND "cudf::binary_operator::LOGICAL_AND"
         LOGICAL_OR "cudf::binary_operator::LOGICAL_OR"
-        COALESCE "cudf::binary_operator::COALESCE"
         GENERIC_BINARY "cudf::binary_operator::GENERIC_BINARY"
 
     cdef unique_ptr[column] binary_operation (