Skip to content

Commit

Permalink
implement DataFrame API and its tests
Browse files Browse the repository at this point in the history
  • Loading branch information
3cham committed May 3, 2024
1 parent 646250c commit d04857e
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 2 deletions.
44 changes: 44 additions & 0 deletions logical-plan/src/main/kotlin/DataFrame.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package io.hqew.kquery.logical

import io.hqew.kquery.datatypes.Schema

interface DataFrame {

/** Apply a projection **/
fun project(expr: List<LogicalExpr>): DataFrame

/** Apply a filter **/
fun filter(expr: LogicalExpr): DataFrame

/** Aggregate **/
fun aggregate(groupBy: List<LogicalExpr>, aggregateExpr: List<AggregateExpr>): DataFrame

/** getSchema **/
fun schema(): Schema

/** get the logical plan **/
fun logicalPlan(): LogicalPlan
}

class DataFrameImpl(private val plan: LogicalPlan): DataFrame {
override fun project(expr: List<LogicalExpr>): DataFrame {
return DataFrameImpl(Projection(plan, expr))
}

override fun filter(expr: LogicalExpr): DataFrame {
return DataFrameImpl(Selection(plan, expr))
}

override fun aggregate(groupBy: List<LogicalExpr>, aggregateExpr: List<AggregateExpr>): DataFrame {
return DataFrameImpl(Aggregate(plan, groupBy, aggregateExpr))
}

override fun schema(): Schema {
return plan.schema()
}

override fun logicalPlan(): LogicalPlan {
return plan
}

}
2 changes: 1 addition & 1 deletion logical-plan/src/main/kotlin/Selection.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ class Selection(val input: LogicalPlan, val expr: LogicalExpr) : LogicalPlan {
}

override fun toString(): String {
return "Filter: $expr"
return "Selection: $expr"
}
}
67 changes: 67 additions & 0 deletions logical-plan/src/test/kotlin/DataFrameTest.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package io.hqew.kquery.logical

import io.hqew.kquery.datasource.CsvDataSource
import org.junit.Test
import org.junit.jupiter.api.TestInstance
import java.io.File
import kotlin.test.assertEquals

@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class DataFrameTest {

@Test
fun `test DataFrame build`() {
val df = csv().project(listOf(Column("id")))

assertEquals(df.schema().fields.size, 1)
assertEquals(df.logicalPlan().toString(), "Projection: #id")
}

@Test
fun `test DataFrame filter`() {
val df = csv()
.filter(Column("first_name").eq(LiteralString("John")))
.project(listOf(Column("id")))

assertEquals(df.schema().fields.size, 1)
println(format(df.logicalPlan()))
assertEquals(format(df.logicalPlan()), "Projection: #id\n\tSelection: #first_name = 'John'\n\t\tScan: employee.csv; projection=None\n")
}

@Test
fun `multiplier and alias`() {

val df =
csv()
.filter(col("state") eq lit("CO"))
.project(
listOf(
col("id"),
col("first_name"),
col("last_name"),
col("salary"),
(col("salary") mult lit(0.1)) alias "bonus"))
.filter(col("bonus") gt lit(1000))

val expected =
"Selection: #bonus > 1000\n" +
"\tProjection: #id, #first_name, #last_name, #salary, #salary * 0.1 as bonus\n" +
"\t\tSelection: #state = 'CO'\n" +
"\t\t\tScan: employee.csv; projection=None\n"

val actual = format(df.logicalPlan())

assertEquals(expected, actual)
}

private fun csv(): DataFrame {
val csvSource = CsvDataSource(
File("../testdata", "employee.csv").absolutePath,
schema = null,
hasHeaders = true,
batchSize = 1024,
)

return DataFrameImpl(Scan("employee.csv", csvSource, listOf()))
}
}
2 changes: 1 addition & 1 deletion logical-plan/src/test/kotlin/LogicalPlanTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class LogicalPlanTest {

assertEquals(
format(plan),
"Projection: #id, #first_name, #last_name\n\tFilter: #state = 'CO'\n\t\tScan: employee.csv; projection=None\n"
"Projection: #id, #first_name, #last_name\n\tSelection: #state = 'CO'\n\t\tScan: employee.csv; projection=None\n"
)
}
}

0 comments on commit d04857e

Please sign in to comment.