Skip to content

Commit

Permalink
Make fast 'exists' checks generic across database engines
Browse files Browse the repository at this point in the history
  • Loading branch information
RudolfCardinal committed Jan 8, 2025
1 parent f3821e4 commit 18af04d
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 16 deletions.
82 changes: 69 additions & 13 deletions cardinal_pythonlib/sqlalchemy/core_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,18 @@
from sqlalchemy.exc import MultipleResultsFound
from sqlalchemy.orm.session import Session
from sqlalchemy.sql.expression import (
case,
column,
exists,
func,
literal,
select,
table,
text,
)
from sqlalchemy.sql.schema import Table
from sqlalchemy.sql.selectable import Select
from sqlalchemy.sql.selectable import Select, TableClause

from cardinal_pythonlib.logs import get_brace_style_log_with_null_handler
from cardinal_pythonlib.sqlalchemy.dialect import SqlaDialectName

log = get_brace_style_log_with_null_handler(__name__)

Expand Down Expand Up @@ -233,15 +232,17 @@ def count_star_and_max(
# http://docs.sqlalchemy.org/en/latest/orm/query.html


def exists_in_table(session: Session, table_: Table, *criteria: Any) -> bool:
def exists_in_table(
session: Session, table_: Union[Table, TableClause], *criteria: Any
) -> bool:
"""
Implements an efficient way of detecting if a record or records exist;
should be faster than ``COUNT(*)`` in some circumstances.
Args:
session: SQLAlchemy :class:`Session`, :class:`Engine`, or
:class:`Connection` object
table_: SQLAlchemy :class:`Table` object
table_: SQLAlchemy :class:`Table` object or table clause
criteria: optional SQLAlchemy "where" criteria
Returns:
Expand All @@ -262,15 +263,70 @@ def exists_in_table(session: Session, table_: Table, *criteria: Any) -> bool:
exists_clause = exists_clause.where(criterion)
# ... EXISTS (SELECT * FROM tablename WHERE ...)

if session.get_bind().dialect.name == SqlaDialectName.MSSQL:
query = select(literal(True)).where(exists_clause)
# ... SELECT 1 WHERE EXISTS (SELECT * FROM tablename WHERE ...)
else:
query = select(exists_clause)
# ... SELECT EXISTS (SELECT * FROM tablename WHERE ...)

# Methods as follows.
# SQL validation: http://developer.mimer.com/validator/
# Standard syntax: https://en.wikipedia.org/wiki/SQL_syntax
# We can make it conditional on dialect via
# session.get_bind().dialect.name
# but it would be better not to need to.
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# SELECT 1 FROM mytable WHERE EXISTS (SELECT * FROM mytable WHERE ...)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# - Produces multiple results (a 1 for each row).
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# SELECT 1 WHERE EXISTS (SELECT * FROM tablename WHERE ...)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# - Produces either 1 or NULL (no rows).
# - Implementation:
#
# query = select(literal(True)).where(exists_clause)
# result = session.execute(query).scalar()
# return bool(result) # None/0 become False; 1 becomes True
#
# - However, may be non-standard: no FROM clause.
# - Works on SQL Server (empirically).
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# SELECT EXISTS (SELECT * FROM tablename WHERE ...)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# - Produces 0 or 1.
# - Implementation:
#
# query = select(exists_clause)
# result = session.execute(query).scalar()
# return bool(result)
#
# - But it may not be standard.
#
# - Supported by MySQL:
# - https://dev.mysql.com/doc/refman/8.4/en/exists-and-not-exists-subqueries.html # noqa: E501
# - and an empirical test
#
# Suported by SQLite:
# - https://www.sqlite.org/lang_expr.html#the_exists_operator
# - and an empirical test
#
# Possibly not SQL Server.
#
# Possibly not Databricks.
# - https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select.html # noqa: E501
# - https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-where.html # noqa: E501
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# SELECT CASE WHEN EXISTS(SELECT * FROM tablename WHERE...) THEN 0 ELSE 1 END # noqa: E501
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# - ANSI standard.
# - https://stackoverflow.com/questions/17284688/how-to-efficiently-check-if-a-table-is-empty # noqa: E501
# - Returns 0 or 1.
# - May be possible to use "SELECT 1 FROM tablename" also, but unclear
# what's faster, and likely EXISTS() should optimise.
# - Implementation as below.

query = select(case((exists_clause, 1), else_=0))
result = session.execute(query).scalar()
return bool(result)
return bool(result) # None/0 become False; 1 becomes True


def exists_plain(session: Session, tablename: str, *criteria: Any) -> bool:
Expand Down
8 changes: 8 additions & 0 deletions cardinal_pythonlib/sqlalchemy/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class SqlaDialectName(object):
Dialect names used by SQLAlchemy.
"""

# SQLAlchemy itself:

FIREBIRD = "firebird"
MYSQL = "mysql"
MSSQL = "mssql"
Expand All @@ -53,6 +55,12 @@ class SqlaDialectName(object):
SQLSERVER = MSSQL # synonym
SYBASE = "sybase"

# Additional third-party dialects:

DATABRICKS = "databricks"
# ... https://github.com/databricks/databricks-sqlalchemy
# ... https://docs.databricks.com/en/sql/language-manual/index.html


ALL_SQLA_DIALECTS = list(
set(
Expand Down
13 changes: 13 additions & 0 deletions cardinal_pythonlib/sqlalchemy/engine_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,16 @@ def is_sqlserver_2008_or_later(engine: "Engine") -> bool:
return False
version_tuple = get_sqlserver_product_version(engine)
return version_tuple >= (SQLSERVER_MAJOR_VERSION_2008,)


# =============================================================================
# Helper functions for Databricks
# =============================================================================


def is_databricks(engine: "Engine") -> bool:
"""
Is the SQLAlchemy :class:`Engine` a Databricks database?
"""
dialect_name = get_dialect_name(engine)
return dialect_name == SqlaDialectName.DATABRICKS
32 changes: 31 additions & 1 deletion cardinal_pythonlib/sqlalchemy/tests/core_query_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,22 @@


class CoreQueryTests(TestCase):
def __init__(self, *args, echo: bool = False, **kwargs) -> None:
self.echo = echo
super().__init__(*args, **kwargs)

def setUp(self) -> None:
self.engine = create_engine(SQLITE_MEMORY_URL, future=True)
self.engine = create_engine(
SQLITE_MEMORY_URL, echo=self.echo, future=True
)
self.tablename = "t"
self.a = "a"
self.b = "b"
self.a_val1 = 1
self.a_val2 = 2
self.b_val1 = 101
self.b_val2 = 102
self.emptytablename = "emptytable"
with self.engine.begin() as con:
con.execute(
text(
Expand All @@ -84,12 +91,16 @@ def setUp(self) -> None:
f"VALUES ({self.a_val2}, {self.b_val2})"
)
)
con.execute(
text(f"CREATE TABLE {self.emptytablename} (x INTEGER)")
)
self.session = sessionmaker(
bind=self.engine, future=True
)() # type: Session
self.metadata = MetaData()
self.metadata.reflect(bind=self.engine)
self.table = self.metadata.tables[self.tablename]
self.emptytable = self.metadata.tables[self.emptytablename]

# noinspection DuplicatedCode
def test_get_rows_fieldnames_from_raw_sql(self) -> None:
Expand Down Expand Up @@ -117,20 +128,39 @@ def test_count_star_and_max(self) -> None:
self.assertEqual(maximum, self.b_val2)

def test_exists_in_table(self) -> None:
# exists:
exists1 = exists_in_table(self.session, self.table)
self.assertTrue(exists1)
exists2 = exists_in_table(
self.session, self.table, column(self.a) == 1
)
self.assertTrue(exists2)
# does not exist:
exists3 = exists_in_table(
self.session, self.table, column(self.a) == 99
)
self.assertFalse(exists3)
exists4 = exists_in_table(self.session, self.emptytable)
self.assertFalse(exists4)

def test_exists_plain(self) -> None:
# exists:
exists1 = exists_plain(self.session, self.tablename)
self.assertTrue(exists1)
exists2 = exists_plain(
self.session, self.tablename, column(self.a) == 1
)
self.assertTrue(exists2)
# does not exist:
exists3 = exists_plain(
self.session, self.tablename, column(self.a) == 99
)
self.assertFalse(exists3)
exists4 = exists_plain(
self.session,
self.emptytablename,
)
self.assertFalse(exists4)

def test_fetch_all_first_values(self) -> None:
select_stmt = select(text("*")).select_from(table(self.tablename))
Expand Down
2 changes: 1 addition & 1 deletion cardinal_pythonlib/sqlalchemy/tests/orm_schema_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class Pet(Base):


class OrmQueryTests(TestCase):
def __init__(self, *args, echo: bool = True, **kwargs) -> None:
def __init__(self, *args, echo: bool = False, **kwargs) -> None:
self.echo = echo
super().__init__(*args, **kwargs)

Expand Down
2 changes: 1 addition & 1 deletion cardinal_pythonlib/sqlalchemy/tests/schema_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def test_schema_functions(self) -> None:


class IndexExistsTests(unittest.TestCase):
def __init__(self, *args, echo: bool = True, **kwargs) -> None:
def __init__(self, *args, echo: bool = False, **kwargs) -> None:
self.echo = echo
super().__init__(*args, **kwargs)

Expand Down

0 comments on commit 18af04d

Please sign in to comment.