-
-
Notifications
You must be signed in to change notification settings - Fork 699
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move Metadata to --internal
database
#2343
Changes from all commits
d383b00
e94673f
8f81d7e
cf162fb
4543816
d8f2786
fd8c750
6ca376d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -443,6 +443,37 @@ def __init__( | |
self._root_token = secrets.token_hex(32) | ||
self.client = DatasetteClient(self) | ||
|
||
async def apply_metadata_json(self): | ||
# Apply any metadata entries from metadata.json to the internal tables | ||
# step 1: top-level metadata | ||
for key in self._metadata_local or {}: | ||
if key == "databases": | ||
continue | ||
await self.set_instance_metadata(key, self._metadata_local[key]) | ||
|
||
# step 2: database-level metadata | ||
for dbname, db in self._metadata_local.get("databases", {}).items(): | ||
for key, value in db.items(): | ||
if key == "tables": | ||
continue | ||
await self.set_database_metadata(dbname, key, value) | ||
|
||
# step 3: table-level metadata | ||
for tablename, table in db.get("tables", {}).items(): | ||
for key, value in table.items(): | ||
if key == "columns": | ||
continue | ||
await self.set_resource_metadata(dbname, tablename, key, value) | ||
|
||
# step 4: column-level metadata (only descriptions in metadata.json) | ||
for columnname, column_description in table.get("columns", {}).items(): | ||
await self.set_column_metadata( | ||
dbname, tablename, columnname, "description", column_description | ||
) | ||
|
||
# TODO(alex) is metadata.json was loaded in, and --internal is not memory, then log | ||
# a warning to user that they should delete their metadata.json file | ||
|
||
Comment on lines
+474
to
+476
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm OK with us not doing this - software telling me to delete a file is a bit weird. |
||
def get_jinja_environment(self, request: Request = None) -> Environment: | ||
environment = self._jinja_env | ||
if request: | ||
|
@@ -476,6 +507,7 @@ async def _refresh_schemas(self): | |
internal_db = self.get_internal_database() | ||
if not self.internal_db_created: | ||
await init_internal_db(internal_db) | ||
await self.apply_metadata_json() | ||
self.internal_db_created = True | ||
current_schema_versions = { | ||
row["database_name"]: row["schema_version"] | ||
|
@@ -646,57 +678,113 @@ def _metadata_recursive_update(self, orig, updated): | |
orig[key] = upd_value | ||
return orig | ||
|
||
def metadata(self, key=None, database=None, table=None, fallback=True): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm so happy to be rid of the synchronous (but undocumented) |
||
""" | ||
Looks up metadata, cascading backwards from specified level. | ||
Returns None if metadata value is not found. | ||
""" | ||
assert not ( | ||
database is None and table is not None | ||
), "Cannot call metadata() with table= specified but not database=" | ||
metadata = {} | ||
async def get_instance_metadata(self): | ||
rows = await self.get_internal_database().execute( | ||
""" | ||
SELECT | ||
key, | ||
value | ||
FROM datasette_metadata_instance_entries | ||
""" | ||
) | ||
return dict(rows) | ||
|
||
async def get_database_metadata(self, database_name: str): | ||
rows = await self.get_internal_database().execute( | ||
""" | ||
SELECT | ||
key, | ||
value | ||
FROM datasette_metadata_database_entries | ||
WHERE database_name = ? | ||
""", | ||
[database_name], | ||
) | ||
return dict(rows) | ||
|
||
async def get_resource_metadata(self, database_name: str, resource_name: str): | ||
rows = await self.get_internal_database().execute( | ||
""" | ||
SELECT | ||
key, | ||
value | ||
FROM datasette_metadata_resource_entries | ||
WHERE database_name = ? | ||
AND resource_name = ? | ||
""", | ||
[database_name, resource_name], | ||
) | ||
return dict(rows) | ||
|
||
for hook_dbs in pm.hook.get_metadata( | ||
datasette=self, key=key, database=database, table=table | ||
): | ||
metadata = self._metadata_recursive_update(metadata, hook_dbs) | ||
|
||
# security precaution!! don't allow anything in the local config | ||
# to be overwritten. this is a temporary measure, not sure if this | ||
# is a good idea long term or maybe if it should just be a concern | ||
# of the plugin's implemtnation | ||
metadata = self._metadata_recursive_update(metadata, self._metadata_local) | ||
|
||
databases = metadata.get("databases") or {} | ||
|
||
search_list = [] | ||
if database is not None: | ||
search_list.append(databases.get(database) or {}) | ||
if table is not None: | ||
table_metadata = ((databases.get(database) or {}).get("tables") or {}).get( | ||
table | ||
) or {} | ||
search_list.insert(0, table_metadata) | ||
|
||
search_list.append(metadata) | ||
if not fallback: | ||
# No fallback allowed, so just use the first one in the list | ||
search_list = search_list[:1] | ||
if key is not None: | ||
for item in search_list: | ||
if key in item: | ||
return item[key] | ||
return None | ||
else: | ||
# Return the merged list | ||
m = {} | ||
for item in search_list: | ||
m.update(item) | ||
return m | ||
async def get_column_metadata( | ||
self, database_name: str, resource_name: str, column_name: str | ||
): | ||
rows = await self.get_internal_database().execute( | ||
""" | ||
SELECT | ||
key, | ||
value | ||
FROM datasette_metadata_column_entries | ||
WHERE database_name = ? | ||
AND resource_name = ? | ||
AND column_name = ? | ||
""", | ||
[database_name, resource_name, column_name], | ||
) | ||
return dict(rows) | ||
|
||
async def set_instance_metadata(self, key: str, value: str): | ||
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This inspired me to survey people to find out what SQLite versions are out there in the wild: https://twitter.com/simonw/status/1800198142002115029 |
||
await self.get_internal_database().execute_write( | ||
""" | ||
INSERT INTO datasette_metadata_instance_entries(key, value) | ||
VALUES(?, ?) | ||
ON CONFLICT(key) DO UPDATE SET value = excluded.value; | ||
""", | ||
[key, value], | ||
) | ||
|
||
@property | ||
def _metadata(self): | ||
return self.metadata() | ||
async def set_database_metadata(self, database_name: str, key: str, value: str): | ||
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04) | ||
await self.get_internal_database().execute_write( | ||
""" | ||
INSERT INTO datasette_metadata_database_entries(database_name, key, value) | ||
VALUES(?, ?, ?) | ||
ON CONFLICT(database_name, key) DO UPDATE SET value = excluded.value; | ||
""", | ||
[database_name, key, value], | ||
) | ||
|
||
async def set_resource_metadata( | ||
self, database_name: str, resource_name: str, key: str, value: str | ||
): | ||
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04) | ||
await self.get_internal_database().execute_write( | ||
""" | ||
INSERT INTO datasette_metadata_resource_entries(database_name, resource_name, key, value) | ||
VALUES(?, ?, ?, ?) | ||
ON CONFLICT(database_name, resource_name, key) DO UPDATE SET value = excluded.value; | ||
""", | ||
[database_name, resource_name, key, value], | ||
) | ||
|
||
async def set_column_metadata( | ||
self, | ||
database_name: str, | ||
resource_name: str, | ||
column_name: str, | ||
key: str, | ||
value: str, | ||
): | ||
# TODO upsert only supported on SQLite 3.24.0 (2018-06-04) | ||
await self.get_internal_database().execute_write( | ||
""" | ||
INSERT INTO datasette_metadata_column_entries(database_name, resource_name, column_name, key, value) | ||
VALUES(?, ?, ?, ?, ?) | ||
ON CONFLICT(database_name, resource_name, column_name, key) DO UPDATE SET value = excluded.value; | ||
""", | ||
[database_name, resource_name, column_name, key, value], | ||
) | ||
|
||
def get_internal_database(self): | ||
return self._internal_database | ||
|
@@ -774,20 +862,6 @@ async def get_canned_query(self, database_name, query_name, actor): | |
if query: | ||
return query | ||
|
||
def update_with_inherited_metadata(self, metadata): | ||
# Fills in source/license with defaults, if available | ||
metadata.update( | ||
{ | ||
"source": metadata.get("source") or self.metadata("source"), | ||
"source_url": metadata.get("source_url") or self.metadata("source_url"), | ||
"license": metadata.get("license") or self.metadata("license"), | ||
"license_url": metadata.get("license_url") | ||
or self.metadata("license_url"), | ||
"about": metadata.get("about") or self.metadata("about"), | ||
"about_url": metadata.get("about_url") or self.metadata("about_url"), | ||
} | ||
) | ||
|
||
def _prepare_connection(self, conn, database): | ||
conn.row_factory = sqlite3.Row | ||
conn.text_factory = lambda x: str(x, "utf-8", "replace") | ||
|
@@ -1079,11 +1153,6 @@ def absolute_url(self, request, path): | |
url = "https://" + url[len("http://") :] | ||
return url | ||
|
||
def _register_custom_units(self): | ||
"""Register any custom units defined in the metadata.json with Pint""" | ||
for unit in self.metadata("custom_units") or []: | ||
ureg.define(unit) | ||
|
||
def _connected_databases(self): | ||
return [ | ||
{ | ||
|
@@ -1436,10 +1505,6 @@ def add_route(view, regex): | |
), | ||
r"/:memory:(?P<rest>.*)$", | ||
) | ||
add_route( | ||
JsonDataView.as_view(self, "metadata.json", lambda: self.metadata()), | ||
r"/-/metadata(\.(?P<format>json))?$", | ||
) | ||
add_route( | ||
JsonDataView.as_view(self, "versions.json", self._versions), | ||
r"/-/versions(\.(?P<format>json))?$", | ||
|
@@ -1585,7 +1650,6 @@ async def resolve_row(self, request): | |
def app(self): | ||
"""Returns an ASGI app function that serves the whole of Datasette""" | ||
routes = self._routes() | ||
self._register_custom_units() | ||
|
||
async def setup_db(): | ||
# First time server starts up, calculate table counts for immutable databases | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we should rename
self._metadata_local
toself._metadata_on_startup
orself._metadata_from_file
to help clarify that it's the metadata that was passed to Datasette's constructor on startup?I don't want people to get confused and think that reading from
self._metadata_local
is a useful thing to do, when they should be calling the new async methods instead.Probably overthinking that though, since those methods will be documented but
self._metadata_local
will not.