Skip to content

Commit

Permalink
[SPARK-13749][SQL][FOLLOW-UP] Faster pivot implementation for many di…
Browse files Browse the repository at this point in the history
…stinct values with two phase aggregation

## What changes were proposed in this pull request?

This is a follow up PR for #11583. It makes 3 lazy vals into just vals and adds unit test coverage.

## How was this patch tested?

Existing unit tests and additional unit tests.

Author: Andrew Ray <[email protected]>

Closes #12861 from aray/fast-pivot-follow-up.

(cherry picked from commit d8f528c)
Signed-off-by: Yin Huai <[email protected]>
  • Loading branch information
aray authored and yhuai committed May 3, 2016
1 parent a7e8cfa commit 5230810
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,12 @@ case class PivotFirst(
copy(mutableAggBufferOffset = newMutableAggBufferOffset)


override lazy val aggBufferAttributes: Seq[AttributeReference] =
override val aggBufferAttributes: Seq[AttributeReference] =
pivotIndex.toList.sortBy(_._2).map(kv => AttributeReference(kv._1.toString, valueDataType)())

override lazy val aggBufferSchema: StructType = StructType.fromAttributes(aggBufferAttributes)
override val aggBufferSchema: StructType = StructType.fromAttributes(aggBufferAttributes)

override lazy val inputAggBufferAttributes: Seq[AttributeReference] =
override val inputAggBufferAttributes: Seq[AttributeReference] =
aggBufferAttributes.map(_.newInstance())
}

Original file line number Diff line number Diff line change
Expand Up @@ -180,4 +180,21 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{
)
}

test("pivot with datatype not supported by PivotFirst") {
checkAnswer(
complexData.groupBy().pivot("b", Seq(true, false)).agg(max("a")),
Row(Seq(1, 1, 1), Seq(2, 2, 2)) :: Nil
)
}

test("pivot with datatype not supported by PivotFirst 2") {
checkAnswer(
courseSales.withColumn("e", expr("array(earnings, 7.0d)"))
.groupBy("year")
.pivot("course", Seq("dotNET", "Java"))
.agg(min($"e")),
Row(2012, Seq(5000.0, 7.0), Seq(20000.0, 7.0)) ::
Row(2013, Seq(48000.0, 7.0), Seq(30000.0, 7.0)) :: Nil
)
}
}

0 comments on commit 5230810

Please sign in to comment.