From 2a7638081c84d7bb8002edb5148f4be1362eb732 Mon Sep 17 00:00:00 2001 From: Noah Dove Date: Thu, 3 Dec 2020 18:10:55 -0800 Subject: [PATCH] Reject null file_id in TDR snapshots (#2579) --- src/azul/plugins/repository/tdr/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/azul/plugins/repository/tdr/__init__.py b/src/azul/plugins/repository/tdr/__init__.py index 9ded339dc7..ed547809f1 100644 --- a/src/azul/plugins/repository/tdr/__init__.py +++ b/src/azul/plugins/repository/tdr/__init__.py @@ -43,6 +43,7 @@ RequirementError, cached_property, config, + reject, require, ) from azul.bigquery import ( @@ -570,8 +571,8 @@ def _parse_file_id_column(self, file_id: Optional[str]) -> Optional[str]: # The file_id column is present for datasets, but is usually null, may # contain unexpected/unusable values, and NEVER produces usable DRS URLs, # so we avoid parsing the column altogether for datasets. - # Some developmental snapshots also expose null file_ids. - if self.source.is_snapshot and file_id is not None: + if self.source.is_snapshot: + reject(file_id is None) # TDR stores the complete DRS URI in the file_id column, but we only # index the path component. These requirements prevent mismatches in # the DRS domain, and ensure that changes to the column syntax don't