Skip to content

Commit

Permalink
Merge pull request alteryx#8 from yhuai/parquetMetastore
Browse files Browse the repository at this point in the history
Partitioning columns can be resolved.
  • Loading branch information
marmbrus committed Aug 8, 2014
2 parents 1161338 + a0baec7 commit 8cdc93c
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ private[hive] trait HiveStrategies {
}).reduceOption(And).getOrElse(Literal(true))

val unresolvedProjection = projectList.map(_ transform {
case a: AttributeReference => UnresolvedAttribute(a.name)
// Handle non-partitioning columns
case a: AttributeReference if !partitionKeyIds.contains(a.exprId) => UnresolvedAttribute(a.name)
})

if (relation.hiveQlTable.isPartitioned) {
Expand Down Expand Up @@ -109,16 +110,27 @@ private[hive] trait HiveStrategies {
}

org.apache.spark.sql.execution.Union(
partitions.par.map(p =>
partitions.par.map { p =>
val partValues = p.getValues()
val internalProjection = unresolvedProjection.map(_ transform {
// Handle partitioning columns
case a: AttributeReference if partitionKeyIds.contains(a.exprId) => {
val idx = relation.partitionKeys.indexWhere(a.exprId == _.exprId)
val key = relation.partitionKeys(idx)

Alias(Cast(Literal(partValues.get(idx), StringType), key.dataType), a.name)()
}
})

hiveContext
.parquetFile(p.getLocation)
.lowerCase
.where(unresolvedOtherPredicates)
.select(unresolvedProjection:_*)
.select(internalProjection:_*)
.queryExecution
.executedPlan
.fakeOutput(projectList.map(_.toAttribute))).seq) :: Nil

.fakeOutput(projectList.map(_.toAttribute))
}.seq) :: Nil
} else {
hiveContext
.parquetFile(relation.hiveQlTable.getDataLocation.getPath)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,22 @@ class ParquetMetastoreSuite extends QueryTest with BeforeAndAfterAll {
sql(s"ALTER TABLE partitioned_parquet ADD PARTITION (p=$p)")
}

test("project the partitioning column") {
checkAnswer(
sql("SELECT p, count(*) FROM partitioned_parquet group by p"),
(1, 10) ::
(2, 10) ::
(3, 10) ::
(4, 10) ::
(5, 10) ::
(6, 10) ::
(7, 10) ::
(8, 10) ::
(9, 10) ::
(10, 10) :: Nil
)
}

test("simple count") {
checkAnswer(
sql("SELECT COUNT(*) FROM partitioned_parquet"),
Expand Down

0 comments on commit 8cdc93c

Please sign in to comment.