Skip to content

Commit

Permalink
Merge pull request #905 from fishtown-analytics/agate-less-aggressive…
Browse files Browse the repository at this point in the history
…-inference

Agate less aggressive inference (#861)
  • Loading branch information
beckjake authored Aug 9, 2018
2 parents 57eaa0c + 7c286fc commit 4459c0d
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- Add a 'generated_at' field to both the manifest and the catalog. ([#887](https://github.com/fishtown-analytics/dbt/pull/877))
- Version 2 of schema.yml, which allows users to create table and column comments that end up in the manifest ([#880](https://github.com/fishtown-analytics/dbt/pull/880))
- Add `docs` blocks that users can put into `.md` files and `doc()` value for schema v2 description fields ([#888](https://github.com/fishtown-analytics/dbt/pull/888))
- Type inference for interpreting CSV data is now less aggressive

## dbt 0.10.2 - Betsy Ross (August 3, 2018)

Expand Down
13 changes: 8 additions & 5 deletions dbt/clients/agate_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
import agate

DEFAULT_TYPE_TESTER = agate.TypeTester(types=[
agate.data_types.Number(),
agate.data_types.Date(),
agate.data_types.DateTime(),
agate.data_types.Boolean(),
agate.data_types.Text()
agate.data_types.Boolean(true_values=('true',),
false_values=('false',),
null_values=('null',)),
agate.data_types.Number(null_values=('null',)),
agate.data_types.TimeDelta(null_values=('null',)),
agate.data_types.Date(null_values=('null',)),
agate.data_types.DateTime(null_values=('null',)),
agate.data_types.Text(null_values=('null',))
])


Expand Down
55 changes: 55 additions & 0 deletions test/unit/test_agate_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from __future__ import unicode_literals
import unittest

from datetime import datetime
from decimal import Decimal
from isodate import tzinfo
import os
from shutil import rmtree
from tempfile import mkdtemp
from dbt.clients import agate_helper

SAMPLE_CSV_DATA = """a,b,c,d,e,f,g
1,n,test,3.2,20180806T11:33:29.320Z,True,NULL
2,y,asdf,900,20180806T11:35:29.320Z,False,a string"""


EXPECTED = [
[1, 'n', 'test', Decimal('3.2'),
datetime(2018, 8, 6, 11, 33, 29, 320000, tzinfo=tzinfo.Utc()),
True, None,
],
[2, 'y', 'asdf', 900,
datetime(2018, 8, 6, 11, 35, 29, 320000, tzinfo=tzinfo.Utc()),
False, 'a string',
],
]

class TestAgateHelper(unittest.TestCase):
def setUp(self):
self.tempdir = mkdtemp()

def tearDown(self):
rmtree(self.tempdir)

def test_from_csv(self):
path = os.path.join(self.tempdir, 'input.csv')
with open(path, 'wb') as fp:
fp.write(SAMPLE_CSV_DATA.encode('utf-8'))
tbl = agate_helper.from_csv(path)
self.assertEqual(len(tbl), len(EXPECTED))
for idx, row in enumerate(tbl):
self.assertEqual(list(row), EXPECTED[idx])

def test_from_data(self):
column_names = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
data = [
{'a': '1', 'b': 'n', 'c': 'test', 'd': '3.2',
'e': '20180806T11:33:29.320Z', 'f': 'True', 'g': 'NULL'},
{'a': '2', 'b': 'y', 'c': 'asdf', 'd': '900',
'e': '20180806T11:35:29.320Z', 'f': 'False', 'g': 'a string'}
]
tbl = agate_helper.table_from_data(data, column_names)
self.assertEqual(len(tbl), len(EXPECTED))
for idx, row in enumerate(tbl):
self.assertEqual(list(row), EXPECTED[idx])

0 comments on commit 4459c0d

Please sign in to comment.