From bdb954798969359ac1e84b279d626129ea2a8591 Mon Sep 17 00:00:00 2001 From: jerryshao Date: Wed, 18 Apr 2018 10:29:34 +0800 Subject: [PATCH] [LIVY-457][REPL] Fix SQLContext is not initialized correctly issue ## What changes were proposed in this pull request? The signature of SQLContext's constructor is changed in Spark2, but we're still using the Spark1's signature, which will throw an exception when using this object. ## How was this patch tested? UT and local verification. Author: jerryshao Closes #86 from jerryshao/LIVY-457. (cherry picked from commit cd8b11261811abb4fe3afe0f19540cd4bdaeb5f7) Signed-off-by: jerryshao --- .travis.yml | 5 ++--- .../test/scala/org/apache/livy/test/InteractiveIT.scala | 4 ++++ repl/src/main/resources/fake_shell.py | 7 +++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 02578e8ee..8cfc51365 100644 --- a/.travis.yml +++ b/.travis.yml @@ -51,14 +51,13 @@ cache: pip: true directories: - $HOME/.m2 - - $(npm config get prefix) before_install: - sudo apt-get -y install python3-pip python-dev - sudo apt-get -y install libkrb5-dev - sudo apt-get -y remove python-setuptools - - sudo pip2 install --upgrade pip "setuptools < 36" - - sudo python3 -m pip install --upgrade pip "setuptools < 36" + - sudo pip2 install --upgrade "pip < 10.0.0" "setuptools < 36" + - sudo python3 -m pip install --upgrade "pip < 10.0.0" "setuptools < 36" - sudo pip2 install codecov cloudpickle - sudo python3 -m pip install cloudpickle diff --git a/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala b/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala index 3777fdd31..ff29d9542 100644 --- a/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala +++ b/integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala @@ -90,6 +90,10 @@ class InteractiveIT extends BaseIntegrationTestSuite { s.run("from pyspark.sql.types import Row").verifyResult("") s.run("x = [Row(age=1, name=u'a'), Row(age=2, name=u'b'), Row(age=3, name=u'c')]") .verifyResult("") + // Check if we're running with Spark2. + if (s.run("spark").result().isLeft) { + s.run("sqlContext.sparkSession").verifyResult(".*pyspark\\.sql\\.session\\.SparkSession.*") + } s.run("%table x").verifyResult(".*headers.*type.*name.*data.*") s.run("abcde").verifyError(ename = "NameError", evalue = "name 'abcde' is not defined") s.run("raise KeyError, 'foo'").verifyError(ename = "KeyError", evalue = "'foo'") diff --git a/repl/src/main/resources/fake_shell.py b/repl/src/main/resources/fake_shell.py index 56a0ac47f..ee869783f 100644 --- a/repl/src/main/resources/fake_shell.py +++ b/repl/src/main/resources/fake_shell.py @@ -588,14 +588,17 @@ def main(): conf = SparkConf(_jvm = gateway.jvm, _jconf = jconf) sc = SparkContext(jsc=jsc, gateway=gateway, conf=conf) global_dict['sc'] = sc - sqlc = SQLContext(sc, jsqlc) - global_dict['sqlContext'] = sqlc if spark_major_version >= "2": from pyspark.sql import SparkSession spark_session = SparkSession(sc, gateway.entry_point.sparkSession()) + sqlc = SQLContext(sc, spark_session, jsqlc) + global_dict['sqlContext'] = sqlc global_dict['spark'] = spark_session else: + sqlc = SQLContext(sc, jsqlc) + global_dict['sqlContext'] = sqlc + # LIVY-294, need to check whether HiveContext can work properly, # fallback to SQLContext if HiveContext can not be initialized successfully. # Only for spark-1.