Skip to content

Commit

Permalink
semi-join #13
Browse files Browse the repository at this point in the history
ignoring anti option for now
  • Loading branch information
piccolbo committed Jul 14, 2015
1 parent 017715b commit aa803ef
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
26 changes: 25 additions & 1 deletion pkg/R/src-sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,4 +337,28 @@ right_join.tbl_SparkSQL =

full_join.tbl_SparkSQL =
function (x, y, by = NULL, copy = FALSE, auto_index = FALSE, ...) {
some_join(x = x, y = y, by = by, copy = copy, auto_index = auto_index, ..., type = "full")}
some_join(x = x, y = y, by = by, copy = copy, auto_index = auto_index, ..., type = "full")}

#modeled after sql_semi_join methods in http://github.com/hadley/dplyr,
#under MIT license
sql_semi_join.SparkSQLConnection =
function (con, x, y, anti = FALSE, by = NULL, ...){
by = dplyr:::common_by(by, x, y)
left = dplyr:::escape(ident("_LEFT"), con = con)
right = dplyr:::escape(ident("_RIGHT"), con = con)
on =
dplyr:::sql_vector(
paste0(
left, ".", dplyr:::sql_escape_ident(con, by$x), " = ",
right, ".", dplyr:::sql_escape_ident(con, by$y)),
collapse = " AND ",
parens = TRUE)
from =
dplyr:::build_sql(
"SELECT * FROM ",
dplyr:::sql_subquery(con, x$query$sql, "_LEFT"), "\n",
"LEFT SEMI JOIN ",
dplyr:::sql_subquery(con, y$query$sql, "_RIGHT"), "\n",
" ON ", on)
attr(from, "vars") = x$select
from}
3 changes: 2 additions & 1 deletion pkg/tests/two-table.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ df1 %>% nrow()
#broken by design
#df1 %>% inner_join(df2, by = "x") %>% nrow()
#broken
#df1 %>% semi_join(df2, by = "x") %>% nrow()
#
df1 %>% semi_join(df2, by = "x") %>% nrow()


#all set op but union missin in hiveql
Expand Down

0 comments on commit aa803ef

Please sign in to comment.