Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add testing for partitioned BZ2 tables in Hive #341

Open
wants to merge 1 commit into
base: sprint-38
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public final class TestGroups
public static final String AUTHORIZATION = "authorization";
public static final String POST_HIVE_1_0_1 = "post_hive_1_0_1";
public static final String PREPARED_STATEMENTS = "prepared_statements";
public static final String BIG_QUERY = "big_query";

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we have had such a group already.
Even presto-product-tests readme refer to it: https://github.com/prestodb/presto/tree/master/presto-product-tests

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's used manually in convention tests but was never added to TestGroups


private TestGroups() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,27 @@
*/
package com.facebook.presto.tests.hive;

import com.facebook.presto.tests.ImmutableTpchTablesRequirements.ImmutableLineItemTable;
import com.google.common.base.MoreObjects;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.teradata.tempto.ProductTest;
import com.teradata.tempto.Requires;
import com.teradata.tempto.assertions.QueryAssert.Row;
import com.teradata.tempto.query.QueryResult;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.List;
import java.util.Map;

import static com.facebook.presto.tests.TestGroups.BIG_QUERY;
import static com.facebook.presto.tests.TestGroups.STORAGE_FORMATS;
import static com.facebook.presto.tests.utils.JdbcDriverUtils.setSessionProperty;
import static com.facebook.presto.tests.utils.QueryExecutors.onHive;
import static com.facebook.presto.util.ImmutableCollectors.toImmutableList;
import static com.teradata.tempto.assertions.QueryAssert.Row.row;
import static com.teradata.tempto.assertions.QueryAssert.assertThat;
Expand All @@ -41,6 +46,7 @@ public class TestHiveStorageFormats
extends ProductTest
{
private static final String TPCH_SCHEMA = "tiny";
private static final String TEST_TPCH_LINIETEM = "tpch." + TPCH_SCHEMA + ".lineitem";

@DataProvider(name = "storage_formats")
public static Object[][] storageFormats()
Expand Down Expand Up @@ -97,7 +103,7 @@ public void testInsertIntoTable(StorageFormat storageFormat)
query(insertInto);

// SELECT FROM TABLE
assertSelect("select sum(tax), sum(discount), sum(linenumber) from %s", tableName);
assertSelect("select sum(tax), sum(discount), sum(linenumber) from %s", tableName, TEST_TPCH_LINIETEM);
Copy link

@maciejgrzybek maciejgrzybek Sep 8, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two arguments while only one expected in String.format?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not string.format args, it's actual table/expected table

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My fault then. Sorry!


// DROP TABLE
query(format("DROP TABLE %s", tableName));
Expand Down Expand Up @@ -125,7 +131,7 @@ public void testCreateTableAs(StorageFormat storageFormat)
query(createTableAsSelect);

// SELECT FROM TABLE
assertSelect("select sum(extendedprice), sum(suppkey), count(partkey) from %s", tableName);
assertSelect("select sum(extendedprice), sum(suppkey), count(partkey) from %s", tableName, TEST_TPCH_LINIETEM);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto


// DROP TABLE
query(format("DROP TABLE %s", tableName));
Expand Down Expand Up @@ -171,7 +177,7 @@ public void testInsertIntoPartitionedTable(StorageFormat storageFormat)
query(insertInto);

// SELECT FROM TABLE
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName);
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName, TEST_TPCH_LINIETEM);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto


// DROP TABLE
query(format("DROP TABLE %s", tableName));
Expand Down Expand Up @@ -199,15 +205,72 @@ public void testCreatePartitionedTableAs(StorageFormat storageFormat)
query(createTableAsSelect);

// SELECT FROM TABLE
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName);
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName, TEST_TPCH_LINIETEM);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto


// DROP TABLE
query(format("DROP TABLE %s", tableName));
}

private static void assertSelect(String query, String tableName)
@Requires(ImmutableLineItemTable.class)
@Test(groups = {STORAGE_FORMATS, BIG_QUERY})
public void testSelectFromPartitionedBzipTable() throws Exception
{
QueryResult expected = query(format(query, "tpch." + TPCH_SCHEMA + ".lineitem"));
// This test is marked as "big_query" because INSERT OVERWRITE TABLE is very slow, but that
// is the only way to get bzip tables in Hive.

String tableName = "storage_formats_test_select_partitioned_bzip";
query(format("DROP TABLE IF EXISTS %s", tableName));

// The BZIP part of the table comes from the configs that are set during insert
String createTable = format(
"CREATE TABLE %s(" +
" l_orderkey BIGINT," +
" l_partkey BIGINT," +
" l_suppkey BIGINT," +
" l_linenumber INT," +
" l_quantity DOUBLE," +
" l_extendedprice DOUBLE," +
" l_discount DOUBLE," +
" l_tax DOUBLE," +
" l_linestatus VARCHAR(1)," +
" l_shipinstruct VARCHAR(25)," +
" l_shipmode VARCHAR(10)," +
" l_comment VARCHAR(44)" +
") PARTITIONED BY (l_returnflag VARCHAR(1)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE",

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is bzip part?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's in setHiveConfigsForBzipInsert -- it isn't a table property, the files stored are just in bz2 format

tableName);
onHive().executeQuery(createTable);

try {
String insertInto = format(
"INSERT OVERWRITE TABLE %s PARTITION(l_returnflag) " +
"SELECT l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, " +
"l_linestatus, l_shipinstruct, l_shipmode, l_comment, l_returnflag " +
"FROM default.lineitem", tableName);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this tpch.default.lineitem?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's hive.default.lineitem

Statement statement = onHive().getConnection().createStatement();
setHiveConfigsForBzipInsert(statement);
statement.execute(insertInto);
statement.close();

assertSelect("select sum(l_tax), sum(l_discount), sum(length(l_returnflag)) from %s", tableName, "hive.default.lineitem");
}
finally {
query(format("DROP TABLE %s", tableName));
}
}

private void setHiveConfigsForBzipInsert(Statement statement)
throws SQLException
{
statement.execute("SET hive.exec.compress.output=true;");
statement.execute("SET mapreduce.output.fileoutputformat.compress=true;");
statement.execute("SET mapred.output.compress=true");
statement.execute("SET mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.BZip2Codec");
statement.execute("SET hive.exec.dynamic.partition.mode=nonstrict;");
}

private static void assertSelect(String query, String tableName, String expectedTable)
{
QueryResult expected = query(format(query, expectedTable));
List<Row> expectedRows = expected.rows().stream()
.map((columns) -> row(columns.toArray()))
.collect(toImmutableList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ public static QueryExecutor onPresto()
return testContext().getDependency(QueryExecutor.class, "presto");
}

public static QueryExecutor onHive()
{
return testContext().getDependency(QueryExecutor.class, "hive");
}

public static QueryExecutor connectToPresto(String prestoConfig)
{
return testContext().getDependency(QueryExecutor.class, prestoConfig);
Expand Down