Skip to content

Commit

Permalink
fixes to csv - hive upload (#4488)
Browse files Browse the repository at this point in the history
  • Loading branch information
timifasubaa authored and Grace Guo committed Feb 28, 2018
1 parent 8626793 commit 404e2d5
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def get_git_sha():
'thrift>=0.9.3',
'thrift-sasl>=0.2.1',
'unidecode>=0.04.21',
'unicodecsv==0.14.1',
'bleach==2.1.2',
],
extras_require={
Expand Down
9 changes: 5 additions & 4 deletions superset/db_engine_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from __future__ import unicode_literals

from collections import defaultdict, namedtuple
import csv
import inspect
import logging
import os
Expand All @@ -35,6 +34,7 @@
from sqlalchemy.engine.url import make_url
from sqlalchemy.sql import text
import sqlparse
import unicodecsv
from werkzeug.utils import secure_filename

from superset import app, cache_util, conf, db, utils
Expand Down Expand Up @@ -850,7 +850,7 @@ def create_table_from_csv(form, table):
"""Uploads a csv file and creates a superset datasource in Hive."""
def get_column_names(filepath):
with open(filepath, 'rb') as f:
return csv.reader(f).next()
return unicodecsv.reader(f, encoding='utf-8-sig').next()

table_name = form.name.data
filename = form.csv_file.data.filename
Expand All @@ -874,11 +874,12 @@ def get_column_names(filepath):
s3 = boto3.client('s3')
location = os.path.join('s3a://', bucket_path, upload_prefix, table_name)
s3.upload_file(
upload_path, 'airbnb-superset',
upload_path, bucket_path,
os.path.join(upload_prefix, table_name, filename))
sql = """CREATE EXTERNAL TABLE {table_name} ( {schema_definition} )
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS
TEXTFILE LOCATION '{location}'""".format(**locals())
TEXTFILE LOCATION '{location}'
tblproperties ('skip.header.line.count'='1')""".format(**locals())
logging.info(form.con.data)
engine = create_engine(form.con.data.sqlalchemy_uri)
engine.execute(sql)
Expand Down

0 comments on commit 404e2d5

Please sign in to comment.