-
Notifications
You must be signed in to change notification settings - Fork 21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add testing for partitioned BZ2 tables in Hive #341
base: sprint-38
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,22 +13,27 @@ | |
*/ | ||
package com.facebook.presto.tests.hive; | ||
|
||
import com.facebook.presto.tests.ImmutableTpchTablesRequirements.ImmutableLineItemTable; | ||
import com.google.common.base.MoreObjects; | ||
import com.google.common.base.Throwables; | ||
import com.google.common.collect.ImmutableMap; | ||
import com.teradata.tempto.ProductTest; | ||
import com.teradata.tempto.Requires; | ||
import com.teradata.tempto.assertions.QueryAssert.Row; | ||
import com.teradata.tempto.query.QueryResult; | ||
import org.testng.annotations.DataProvider; | ||
import org.testng.annotations.Test; | ||
|
||
import java.sql.Connection; | ||
import java.sql.SQLException; | ||
import java.sql.Statement; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import static com.facebook.presto.tests.TestGroups.BIG_QUERY; | ||
import static com.facebook.presto.tests.TestGroups.STORAGE_FORMATS; | ||
import static com.facebook.presto.tests.utils.JdbcDriverUtils.setSessionProperty; | ||
import static com.facebook.presto.tests.utils.QueryExecutors.onHive; | ||
import static com.facebook.presto.util.ImmutableCollectors.toImmutableList; | ||
import static com.teradata.tempto.assertions.QueryAssert.Row.row; | ||
import static com.teradata.tempto.assertions.QueryAssert.assertThat; | ||
|
@@ -41,6 +46,7 @@ public class TestHiveStorageFormats | |
extends ProductTest | ||
{ | ||
private static final String TPCH_SCHEMA = "tiny"; | ||
private static final String TEST_TPCH_LINIETEM = "tpch." + TPCH_SCHEMA + ".lineitem"; | ||
|
||
@DataProvider(name = "storage_formats") | ||
public static Object[][] storageFormats() | ||
|
@@ -97,7 +103,7 @@ public void testInsertIntoTable(StorageFormat storageFormat) | |
query(insertInto); | ||
|
||
// SELECT FROM TABLE | ||
assertSelect("select sum(tax), sum(discount), sum(linenumber) from %s", tableName); | ||
assertSelect("select sum(tax), sum(discount), sum(linenumber) from %s", tableName, TEST_TPCH_LINIETEM); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two arguments while only one expected in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's not string.format args, it's actual table/expected table There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My fault then. Sorry! |
||
|
||
// DROP TABLE | ||
query(format("DROP TABLE %s", tableName)); | ||
|
@@ -125,7 +131,7 @@ public void testCreateTableAs(StorageFormat storageFormat) | |
query(createTableAsSelect); | ||
|
||
// SELECT FROM TABLE | ||
assertSelect("select sum(extendedprice), sum(suppkey), count(partkey) from %s", tableName); | ||
assertSelect("select sum(extendedprice), sum(suppkey), count(partkey) from %s", tableName, TEST_TPCH_LINIETEM); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
|
||
// DROP TABLE | ||
query(format("DROP TABLE %s", tableName)); | ||
|
@@ -171,7 +177,7 @@ public void testInsertIntoPartitionedTable(StorageFormat storageFormat) | |
query(insertInto); | ||
|
||
// SELECT FROM TABLE | ||
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName); | ||
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName, TEST_TPCH_LINIETEM); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
|
||
// DROP TABLE | ||
query(format("DROP TABLE %s", tableName)); | ||
|
@@ -199,15 +205,72 @@ public void testCreatePartitionedTableAs(StorageFormat storageFormat) | |
query(createTableAsSelect); | ||
|
||
// SELECT FROM TABLE | ||
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName); | ||
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName, TEST_TPCH_LINIETEM); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
|
||
// DROP TABLE | ||
query(format("DROP TABLE %s", tableName)); | ||
} | ||
|
||
private static void assertSelect(String query, String tableName) | ||
@Requires(ImmutableLineItemTable.class) | ||
@Test(groups = {STORAGE_FORMATS, BIG_QUERY}) | ||
public void testSelectFromPartitionedBzipTable() throws Exception | ||
{ | ||
QueryResult expected = query(format(query, "tpch." + TPCH_SCHEMA + ".lineitem")); | ||
// This test is marked as "big_query" because INSERT OVERWRITE TABLE is very slow, but that | ||
// is the only way to get bzip tables in Hive. | ||
|
||
String tableName = "storage_formats_test_select_partitioned_bzip"; | ||
query(format("DROP TABLE IF EXISTS %s", tableName)); | ||
|
||
// The BZIP part of the table comes from the configs that are set during insert | ||
String createTable = format( | ||
"CREATE TABLE %s(" + | ||
" l_orderkey BIGINT," + | ||
" l_partkey BIGINT," + | ||
" l_suppkey BIGINT," + | ||
" l_linenumber INT," + | ||
" l_quantity DOUBLE," + | ||
" l_extendedprice DOUBLE," + | ||
" l_discount DOUBLE," + | ||
" l_tax DOUBLE," + | ||
" l_linestatus VARCHAR(1)," + | ||
" l_shipinstruct VARCHAR(25)," + | ||
" l_shipmode VARCHAR(10)," + | ||
" l_comment VARCHAR(44)" + | ||
") PARTITIONED BY (l_returnflag VARCHAR(1)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is bzip part? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's in setHiveConfigsForBzipInsert -- it isn't a table property, the files stored are just in bz2 format |
||
tableName); | ||
onHive().executeQuery(createTable); | ||
|
||
try { | ||
String insertInto = format( | ||
"INSERT OVERWRITE TABLE %s PARTITION(l_returnflag) " + | ||
"SELECT l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, " + | ||
"l_linestatus, l_shipinstruct, l_shipmode, l_comment, l_returnflag " + | ||
"FROM default.lineitem", tableName); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this tpch.default.lineitem? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's hive.default.lineitem |
||
Statement statement = onHive().getConnection().createStatement(); | ||
setHiveConfigsForBzipInsert(statement); | ||
statement.execute(insertInto); | ||
statement.close(); | ||
|
||
assertSelect("select sum(l_tax), sum(l_discount), sum(length(l_returnflag)) from %s", tableName, "hive.default.lineitem"); | ||
} | ||
finally { | ||
query(format("DROP TABLE %s", tableName)); | ||
} | ||
} | ||
|
||
private void setHiveConfigsForBzipInsert(Statement statement) | ||
throws SQLException | ||
{ | ||
statement.execute("SET hive.exec.compress.output=true;"); | ||
statement.execute("SET mapreduce.output.fileoutputformat.compress=true;"); | ||
statement.execute("SET mapred.output.compress=true"); | ||
statement.execute("SET mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.BZip2Codec"); | ||
statement.execute("SET hive.exec.dynamic.partition.mode=nonstrict;"); | ||
} | ||
|
||
private static void assertSelect(String query, String tableName, String expectedTable) | ||
{ | ||
QueryResult expected = query(format(query, expectedTable)); | ||
List<Row> expectedRows = expected.rows().stream() | ||
.map((columns) -> row(columns.toArray())) | ||
.collect(toImmutableList()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we have had such a group already.
Even presto-product-tests readme refer to it: https://github.com/prestodb/presto/tree/master/presto-product-tests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's used manually in convention tests but was never added to TestGroups