Skip to content

Commit

Permalink
--crossdb option for joining across databases (#1232)
Browse files Browse the repository at this point in the history
* Test for cross-database join, refs #283
* Warn if --crossdb used with more than 10 DBs, refs #283
* latest.datasette.io demo of --crossdb joins, refs #283
* Show attached databases on /_memory page, refs #283
* Documentation for cross-database queries, refs #283
  • Loading branch information
simonw authored Feb 18, 2021
1 parent 4df548e commit 6f41c8a
Show file tree
Hide file tree
Showing 13 changed files with 215 additions and 8 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/deploy-latest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
- name: Run tests
run: pytest
- name: Build fixtures.db
run: python tests/fixtures.py fixtures.db fixtures.json plugins
run: python tests/fixtures.py fixtures.db fixtures.json plugins --extra-db-filename extra_database.db
- name: Build docs.db
run: |-
cd docs
Expand All @@ -48,12 +48,12 @@ jobs:
run: |-
gcloud config set run/region us-central1
gcloud config set project datasette-222320
datasette publish cloudrun fixtures.db \
datasette publish cloudrun fixtures.db extra_database.db \
-m fixtures.json \
--plugins-dir=plugins \
--branch=$GITHUB_SHA \
--version-note=$GITHUB_SHA \
--extra-options="--setting template_debug 1" \
--extra-options="--setting template_debug 1 --crossdb" \
--install=pysqlite3-binary \
--service=datasette-latest
# Deploy docs.db to a different service
Expand Down
20 changes: 19 additions & 1 deletion datasette/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@

app_root = Path(__file__).parent.parent

# https://github.com/simonw/datasette/issues/283#issuecomment-781591015
SQLITE_LIMIT_ATTACHED = 10

Setting = collections.namedtuple("Setting", ("name", "default", "help"))
SETTINGS = (
Setting("default_page_size", 100, "Default page size for the table view"),
Expand Down Expand Up @@ -194,6 +197,7 @@ def __init__(
version_note=None,
config_dir=None,
pdb=False,
crossdb=False,
):
assert config_dir is None or isinstance(
config_dir, Path
Expand All @@ -217,7 +221,8 @@ def __init__(
self.inspect_data = inspect_data
self.immutables = set(immutables or [])
self.databases = collections.OrderedDict()
if memory or not self.files:
self.crossdb = crossdb
if memory or crossdb or not self.files:
self.add_database(Database(self, is_memory=True), name="_memory")
# memory_name is a random string so that each Datasette instance gets its own
# unique in-memory named database - otherwise unit tests can fail with weird
Expand Down Expand Up @@ -499,6 +504,19 @@ def _prepare_connection(self, conn, database):
conn.execute(f"PRAGMA cache_size=-{self.setting('cache_size_kb')}")
# pylint: disable=no-member
pm.hook.prepare_connection(conn=conn, database=database, datasette=self)
# If self.crossdb and this is _memory, connect the first SQLITE_LIMIT_ATTACHED databases
if self.crossdb and database == "_memory":
count = 0
for db_name, db in self.databases.items():
if count >= SQLITE_LIMIT_ATTACHED or db.is_memory:
continue
sql = 'ATTACH DATABASE "file:{path}?{qs}" AS [{name}];'.format(
path=db.path,
qs="mode=ro" if db.is_mutable else "immutable=1",
name=db_name,
)
conn.execute(sql)
count += 1

def add_message(self, request, message, type=INFO):
if not hasattr(request, "_messages"):
Expand Down
21 changes: 20 additions & 1 deletion datasette/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sys
from runpy import run_module
import webbrowser
from .app import Datasette, DEFAULT_SETTINGS, SETTINGS, pm
from .app import Datasette, DEFAULT_SETTINGS, SETTINGS, SQLITE_LIMIT_ATTACHED, pm
from .utils import (
StartupError,
check_connection,
Expand Down Expand Up @@ -410,6 +410,11 @@ def uninstall(packages, yes):
is_flag=True,
help="Create database files if they do not exist",
)
@click.option(
"--crossdb",
is_flag=True,
help="Enable cross-database joins using the /_memory database",
)
@click.option(
"--ssl-keyfile",
help="SSL key file",
Expand Down Expand Up @@ -442,6 +447,7 @@ def serve(
pdb,
open_browser,
create,
crossdb,
ssl_keyfile,
ssl_certfile,
return_instance=False,
Expand Down Expand Up @@ -499,6 +505,7 @@ def serve(
secret=secret,
version_note=version_note,
pdb=pdb,
crossdb=crossdb,
)

# if files is a single directory, use that as config_dir=
Expand Down Expand Up @@ -591,3 +598,15 @@ async def check_databases(ds):
raise click.UsageError(
f"Connection to {database.path} failed check: {str(e.args[0])}"
)
# If --crossdb and more than SQLITE_LIMIT_ATTACHED show warning
if (
ds.crossdb
and len([db for db in ds.databases.values() if not db.is_memory])
> SQLITE_LIMIT_ATTACHED
):
msg = (
"Warning: --crossdb only works with the first {} attached databases".format(
SQLITE_LIMIT_ATTACHED
)
)
click.echo(click.style(msg, bold=True, fg="yellow"), err=True)
11 changes: 10 additions & 1 deletion datasette/database.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
from collections import namedtuple
from pathlib import Path
import janus
import queue
Expand All @@ -22,6 +23,8 @@

connections = threading.local()

AttachedDatabase = namedtuple("AttachedDatabase", ("seq", "name", "file"))


class Database:
def __init__(
Expand Down Expand Up @@ -78,7 +81,7 @@ def connect(self, write=False):
conn.execute("PRAGMA query_only=1")
return conn
if self.is_memory:
return sqlite3.connect(":memory:")
return sqlite3.connect(":memory:", uri=True)
# mode=ro or immutable=1?
if self.is_mutable:
qs = "?mode=ro"
Expand Down Expand Up @@ -243,6 +246,12 @@ def mtime_ns(self):
return None
return Path(self.path).stat().st_mtime_ns

async def attached_databases(self):
results = await self.execute(
"select seq, name, file from pragma_database_list() where seq > 0"
)
return [AttachedDatabase(*row) for row in results.rows]

async def table_exists(self, table):
results = await self.execute(
"select 1 from sqlite_master where type='table' and name=?", params=(table,)
Expand Down
11 changes: 11 additions & 0 deletions datasette/templates/database.html
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,17 @@ <h3>Custom SQL query</h3>
</form>
{% endif %}

{% if attached_databases %}
<div class="message-info">
<p>The following databases are attached to this connection, and can be used for cross-database joins:</p>
<ul class="bullets">
{% for db_name in attached_databases %}
<li><strong>{{ db_name }}</strong> - <a href="?sql=select+*+from+[{{ db_name }}].sqlite_master+where+type='table'">tables</a></li>
{% endfor %}
</ul>
</div>
{% endif %}

{% for table in tables %}
{% if show_hidden or not table.hidden %}
<div class="db-table">
Expand Down
3 changes: 3 additions & 0 deletions datasette/views/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ async def database_actions():
links.extend(extra_links)
return links

attached_databases = [d.name for d in await db.attached_databases()]

return (
{
"database": database,
Expand All @@ -139,6 +141,7 @@ async def database_actions():
"allow_download": self.ds.setting("allow_download")
and not db.is_mutable
and not db.is_memory,
"attached_databases": attached_databases,
},
(f"database-{to_css_class(database)}.html", "database.html"),
)
Expand Down
1 change: 1 addition & 0 deletions docs/datasette-serve-help.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Options:
--pdb Launch debugger on any errors
-o, --open Open Datasette in your web browser
--create Create database files if they do not exist
--crossdb Enable cross-database joins using the /_memory database
--ssl-keyfile TEXT SSL key file
--ssl-certfile TEXT SSL certificate file
--help Show this message and exit.
3 changes: 3 additions & 0 deletions docs/internals.rst
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,9 @@ The ``Database`` class also provides properties and methods for introspecting th
``db.is_memory`` - boolean
Is this database an in-memory database?

``await db.attached_databases()`` - list of named tuples
Returns a list of additional databases that have been connected to this database using the SQLite ATTACH command. Each named tuple has fields ``seq``, ``name`` and ``file``.

``await db.table_exists(table)`` - boolean
Check if a table called ``table`` exists.

Expand Down
31 changes: 31 additions & 0 deletions docs/sql_queries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,34 @@ detect if there should be another page.
Since the where clause acts against the index on the primary key, the query is
extremely fast even for records that are a long way into the overall pagination
set.

.. _cross_database_quereies:

Cross-database queries
----------------------

SQLite has the ability to run queries that join across multiple databases. Up to ten databases can be attached to a single SQLite connection and queried together.

Datasette can execute joins across multiple databases if it is started with the ``--crossdb`` option::

datasette fixtures.db extra_database.db --crossdb

If it is started in this way, the ``/_memory`` page can be used to execute queries that join across multiple databases.

References to tables in attached databases should be preceeded by the database name and a period.

For example, this query will show a list of tables across both of the above databases:

.. code-block:: sql
select
'fixtures' as database, *
from
[fixtures].sqlite_master
union
select
'extra_database' as database, *
from
[extra_database].sqlite_master
`Try that out here <https://latest.datasette.io/_memory?sql=select%0D%0A++%27fixtures%27+as+database%2C+*%0D%0Afrom%0D%0A++%5Bfixtures%5D.sqlite_master%0D%0Aunion%0D%0Aselect%0D%0A++%27extra_database%27+as+database%2C+*%0D%0Afrom%0D%0A++%5Bextra_database%5D.sqlite_master>`__.
29 changes: 28 additions & 1 deletion tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def make_app_client(
static_mounts=None,
template_dir=None,
metadata=None,
crossdb=False,
):
with tempfile.TemporaryDirectory() as tmpdir:
filepath = os.path.join(tmpdir, filename)
Expand Down Expand Up @@ -149,6 +150,7 @@ def make_app_client(
inspect_data=inspect_data,
static_mounts=static_mounts,
template_dir=template_dir,
crossdb=crossdb,
)
ds.sqlite_functions.append(("sleep", 1, lambda n: time.sleep(float(n))))
yield TestClient(ds)
Expand Down Expand Up @@ -180,6 +182,15 @@ def app_client_two_attached_databases():
yield client


@pytest.fixture(scope="session")
def app_client_two_attached_databases_crossdb_enabled():
with make_app_client(
extra_databases={"extra database.db": EXTRA_DATABASE_SQL},
crossdb=True,
) as client:
yield client


@pytest.fixture(scope="session")
def app_client_conflicting_database_names():
with make_app_client(
Expand Down Expand Up @@ -750,7 +761,12 @@ def assert_permissions_checked(datasette, actions):
default=False,
help="Delete and recreate database if it exists",
)
def cli(db_filename, metadata, plugins_path, recreate):
@click.option(
"--extra-db-filename",
type=click.Path(file_okay=True, dir_okay=False),
help="Write out second test DB to this file",
)
def cli(db_filename, metadata, plugins_path, recreate, extra_db_filename):
"""Write out the fixtures database used by Datasette's test suite"""
if metadata and not metadata.endswith(".json"):
raise click.ClickException("Metadata should end with .json")
Expand Down Expand Up @@ -784,6 +800,17 @@ def cli(db_filename, metadata, plugins_path, recreate):
newpath = path / filepath.name
newpath.write_text(filepath.open().read())
print(f" Wrote plugin: {newpath}")
if extra_db_filename:
if pathlib.Path(extra_db_filename).exists():
if not recreate:
raise click.ClickException(
f"{extra_db_filename} already exists, use --recreate to reset it"
)
else:
pathlib.Path(extra_db_filename).unlink()
conn = sqlite3.connect(extra_db_filename)
conn.executescript(EXTRA_DATABASE_SQL)
print(f"Test tables written to {extra_db_filename}")


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def test_metadata_yaml():
get=None,
help_config=False,
pdb=False,
crossdb=False,
open_browser=False,
create=False,
ssl_keyfile=None,
Expand Down
75 changes: 75 additions & 0 deletions tests/test_crossdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from datasette.cli import cli
from click.testing import CliRunner
import urllib
import sqlite3
from .fixtures import app_client_two_attached_databases_crossdb_enabled


def test_crossdb_join(app_client_two_attached_databases_crossdb_enabled):
app_client = app_client_two_attached_databases_crossdb_enabled
sql = """
select
'extra database' as db,
pk,
text1,
text2
from
[extra database].searchable
union all
select
'fixtures' as db,
pk,
text1,
text2
from
fixtures.searchable
"""
response = app_client.get(
"/_memory.json?" + urllib.parse.urlencode({"sql": sql, "_shape": "array"})
)
assert response.status == 200
assert response.json == [
{"db": "extra database", "pk": 1, "text1": "barry cat", "text2": "terry dog"},
{"db": "extra database", "pk": 2, "text1": "terry dog", "text2": "sara weasel"},
{"db": "fixtures", "pk": 1, "text1": "barry cat", "text2": "terry dog"},
{"db": "fixtures", "pk": 2, "text1": "terry dog", "text2": "sara weasel"},
]


def test_crossdb_warning_if_too_many_databases(tmp_path_factory):
db_dir = tmp_path_factory.mktemp("dbs")
dbs = []
for i in range(11):
path = str(db_dir / "db_{}.db".format(i))
conn = sqlite3.connect(path)
conn.execute("vacuum")
dbs.append(path)
runner = CliRunner(mix_stderr=False)
result = runner.invoke(
cli,
[
"serve",
"--crossdb",
"--get",
"/",
]
+ dbs,
catch_exceptions=False,
)
assert (
"Warning: --crossdb only works with the first 10 attached databases"
in result.stderr
)


def test_crossdb_attached_database_list_display(
app_client_two_attached_databases_crossdb_enabled,
):
app_client = app_client_two_attached_databases_crossdb_enabled
response = app_client.get("/_memory")
for fragment in (
"databases are attached to this connection",
"<li><strong>fixtures</strong> - ",
"<li><strong>extra database</strong> - ",
):
assert fragment in response.text
Loading

0 comments on commit 6f41c8a

Please sign in to comment.