From 1eac461b5ee802960d9ebde27b867e04030337a6 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 17 Sep 2015 16:12:29 +0800 Subject: [PATCH 1/2] Fix hash value for tuple. --- python/pyspark/rdd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 9ef60a7e2c84b..4c7a88b08e463 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -84,7 +84,7 @@ def portable_hash(x): h ^= len(x) if h == -1: h = -2 - return h + return int(h) return hash(x) From d5bfb01ea41a1f7cbb3e52e3224768804b5fac50 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 17 Sep 2015 23:46:29 +0800 Subject: [PATCH 2/2] Add test. --- python/pyspark/rdd.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 4c7a88b08e463..ab5aab1e115f7 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -2192,6 +2192,9 @@ def lookup(self, key): [42] >>> sorted.lookup(1024) [] + >>> rdd2 = sc.parallelize([(('a', 'b'), 'c')]).groupByKey() + >>> list(rdd2.lookup(('a', 'b'))[0]) + ['c'] """ values = self.filter(lambda kv: kv[0] == key).values()