From f2a7cd9120a3b5b5c29ad33c7b4c218faa929ca9 Mon Sep 17 00:00:00 2001
From: Chloe He <chloehe47@gmail.com>
Date: Mon, 6 May 2024 06:48:00 -0700
Subject: [PATCH] fix: replace NaNs with None in some backends when loading
 from pandas dataframe (#9094)

## Description of changes

Examples were broken on MySQL backend and Postgres backend when there are
null values in a numeric column.

Druid, PySpark, RW don't support examples.

- Exasol - did not test
- Flink - broken
- Impala - did not test
- MSSQL - broken #9095
- MySQL - fixed
- Oracle - did not test
- PostgreSQL - fixed
- Snowflake - did not test

## Issues closed

#8792

---------

Co-authored-by: Chloe He <chloe@chloe-mac.lan>
---
 ibis/backends/mysql/__init__.py    |  4 ++++
 ibis/backends/postgres/__init__.py | 13 +++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py
index f09768054d19..ce0589fa2843 100644
--- a/ibis/backends/mysql/__init__.py
+++ b/ibis/backends/mysql/__init__.py
@@ -11,6 +11,7 @@
 from typing import TYPE_CHECKING, Any
 from urllib.parse import parse_qs, urlparse
 
+import numpy as np
 import pymysql
 import sqlglot as sg
 import sqlglot.expressions as sge
@@ -481,6 +482,9 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
 
             columns = schema.keys()
             df = op.data.to_frame()
+            # nan can not be used with MySQL
+            df = df.replace(np.nan, None)
+
             data = df.itertuples(index=False)
             cols = ", ".join(
                 ident.sql(self.name)
diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py
index e7f54864fb48..016b548d4e30 100644
--- a/ibis/backends/postgres/__init__.py
+++ b/ibis/backends/postgres/__init__.py
@@ -11,8 +11,11 @@
 from typing import TYPE_CHECKING, Any, Callable
 from urllib.parse import parse_qs, urlparse
 
+import numpy as np
+import pandas as pd
 import sqlglot as sg
 import sqlglot.expressions as sge
+from pandas.api.types import is_float_dtype
 
 import ibis
 import ibis.common.exceptions as com
@@ -144,6 +147,16 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
 
             columns = schema.keys()
             df = op.data.to_frame()
+            # nan gets compiled into 'NaN'::float which throws errors in non-float columns
+            # In order to hold NaN values, pandas automatically converts integer columns
+            # to float columns if there are NaN values in them. Therefore, we need to convert
+            # them to their original dtypes (that support pd.NA) to figure out which columns
+            # are actually non-float, then fill the NaN values in those columns with None.
+            convert_df = df.convert_dtypes()
+            for col in convert_df.columns:
+                if not is_float_dtype(convert_df[col]):
+                    df[col] = df[col].replace(np.nan, None)
+
             data = df.itertuples(index=False)
             cols = ", ".join(
                 ident.sql(self.dialect)