Skip to content

Commit

Permalink
Add column metadata to Athena glue processing.
Browse files Browse the repository at this point in the history
  • Loading branch information
Marina Samuel committed Apr 11, 2019
1 parent f67dbe8 commit 179aed7
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 5 deletions.
11 changes: 10 additions & 1 deletion redash/query_runner/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,18 @@ def __get_schema_from_glue(self):
table_name = '%s.%s' % (database['Name'], table['Name'])
if table_name not in schema:
column = [columns['Name'] for columns in table['StorageDescriptor']['Columns']]
schema[table_name] = {'name': table_name, 'columns': column}
metadata = [{
"name": column_data['Name'],
"type": column_data['Type']
} for column_data in table['StorageDescriptor']['Columns']]
schema[table_name] = {'name': table_name, 'columns': column, 'metadata': metadata}
for partition in table.get('PartitionKeys', []):
schema[table_name]['columns'].append(partition['Name'])
schema[table_name]['metadata'].append({
"name": partition['Name'],
"type": partition['Type']
})

return schema.values()

def get_schema(self, get_stats=False):
Expand Down
24 changes: 20 additions & 4 deletions tests/query_runner/test_athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ def test_external_table(self):
{'DatabaseName': 'test1'},
)
with self.stubber:
assert query_runner.get_schema() == [{'columns': ['row_id'], 'name': 'test1.jdbc_table'}]
assert query_runner.get_schema() == [{
'columns': ['row_id'],
'name': 'test1.jdbc_table',
'metadata': [{'type': 'int', 'name': 'row_id'}]
}]

def test_partitioned_table(self):
"""
Expand Down Expand Up @@ -118,7 +122,11 @@ def test_partitioned_table(self):
{'DatabaseName': 'test1'},
)
with self.stubber:
assert query_runner.get_schema() == [{'columns': ['sk', 'category'], 'name': 'test1.partitioned_table'}]
assert query_runner.get_schema() == [{
'columns': ['sk', 'category'],
'name': 'test1.partitioned_table',
'metadata': [{'type': 'int', 'name': 'sk'}, {'type': 'int', 'name': 'category'}]
}]

def test_view(self):
query_runner = Athena({'glue': True, 'region': 'mars-east-1'})
Expand Down Expand Up @@ -150,7 +158,11 @@ def test_view(self):
{'DatabaseName': 'test1'},
)
with self.stubber:
assert query_runner.get_schema() == [{'columns': ['sk'], 'name': 'test1.view'}]
assert query_runner.get_schema() == [{
'columns': ['sk'],
'name': 'test1.view',
'metadata': [{'type': 'int', 'name': 'sk'}]
}]

def test_dodgy_table_does_not_break_schema_listing(self):
"""
Expand Down Expand Up @@ -187,4 +199,8 @@ def test_dodgy_table_does_not_break_schema_listing(self):
{'DatabaseName': 'test1'},
)
with self.stubber:
assert query_runner.get_schema() == [{'columns': ['region'], 'name': 'test1.csv'}]
assert query_runner.get_schema() == [{
'columns': ['region'],
'name': 'test1.csv',
'metadata': [{'type': 'string', 'name': 'region'}]
}]

0 comments on commit 179aed7

Please sign in to comment.