diff --git a/setup.py b/setup.py index 2c14b90a45304..43744a4e1003d 100644 --- a/setup.py +++ b/setup.py @@ -81,6 +81,7 @@ def get_git_sha(): 'thrift>=0.9.3', 'thrift-sasl>=0.2.1', 'unidecode>=0.04.21', + 'unicodecsv==0.14.1', 'bleach==2.1.2', ], extras_require={ diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index f0e7c67620712..c40bbe7f1432d 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -17,7 +17,6 @@ from __future__ import unicode_literals from collections import defaultdict, namedtuple -import csv import inspect import logging import os @@ -34,6 +33,7 @@ from sqlalchemy.engine.url import make_url from sqlalchemy.sql import text import sqlparse +import unicodecsv from werkzeug.utils import secure_filename from superset import app, cache_util, conf, db, utils @@ -849,7 +849,7 @@ def create_table_from_csv(form, table): """Uploads a csv file and creates a superset datasource in Hive.""" def get_column_names(filepath): with open(filepath, 'rb') as f: - return csv.reader(f).next() + return unicodecsv.reader(f, encoding='utf-8-sig').next() table_name = form.name.data filename = form.csv_file.data.filename @@ -873,11 +873,12 @@ def get_column_names(filepath): s3 = boto3.client('s3') location = os.path.join('s3a://', bucket_path, upload_prefix, table_name) s3.upload_file( - upload_path, 'airbnb-superset', + upload_path, bucket_path, os.path.join(upload_prefix, table_name, filename)) sql = """CREATE EXTERNAL TABLE {table_name} ( {schema_definition} ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS - TEXTFILE LOCATION '{location}'""".format(**locals()) + TEXTFILE LOCATION '{location}' + tblproperties ('skip.header.line.count'='1')""".format(**locals()) logging.info(form.con.data) engine = create_engine(form.con.data.sqlalchemy_uri) engine.execute(sql)