From c304cc8ae1a39b3c58f21153e7311cfab7dfac9e Mon Sep 17 00:00:00 2001 From: Nick Pentreath Date: Sun, 15 Dec 2013 08:32:37 +0200 Subject: [PATCH] Adding supporting sequncefiles for tests. Cleaning up --- .../apache/spark/api/python/PythonRDD.scala | 165 ++++++++++-------- .../apache/spark/api/python/SerDeUtil.scala | 64 ++++--- .../api/python/WriteInputFormatTests.scala | 64 +++++++ python/pyspark/context.py | 32 ++-- .../test_support/data/sfarray/._SUCCESS.crc | Bin 0 -> 8 bytes .../data/sfarray/.part-r-00000.crc | Bin 0 -> 12 bytes .../data/sfarray/.part-r-00001.crc | Bin 0 -> 12 bytes python/test_support/data/sfarray/_SUCCESS | 0 python/test_support/data/sfarray/part-r-00000 | Bin 0 -> 134 bytes python/test_support/data/sfarray/part-r-00001 | Bin 0 -> 174 bytes .../test_support/data/sfclass/._SUCCESS.crc | Bin 0 -> 8 bytes .../data/sfclass/.part-r-00000.crc | Bin 0 -> 12 bytes .../data/sfclass/.part-r-00001.crc | Bin 0 -> 12 bytes python/test_support/data/sfclass/_SUCCESS | 0 python/test_support/data/sfclass/part-r-00000 | Bin 0 -> 151 bytes python/test_support/data/sfclass/part-r-00001 | Bin 0 -> 181 bytes .../data/sfdouble/.part-00000.crc | Bin 12 -> 12 bytes .../data/sfdouble/.part-00001.crc | Bin 12 -> 12 bytes python/test_support/data/sfdouble/part-00000 | Bin 145 -> 145 bytes python/test_support/data/sfdouble/part-00001 | Bin 145 -> 145 bytes .../test_support/data/sfint/.part-00000.crc | Bin 12 -> 12 bytes .../test_support/data/sfint/.part-00001.crc | Bin 12 -> 12 bytes python/test_support/data/sfint/part-00000 | Bin 130 -> 145 bytes python/test_support/data/sfint/part-00001 | Bin 130 -> 145 bytes python/test_support/data/sftext/._SUCCESS.crc | Bin 0 -> 8 bytes .../test_support/data/sftext/.part-00000.crc | Bin 12 -> 12 bytes .../test_support/data/sftext/.part-00001.crc | Bin 12 -> 12 bytes python/test_support/data/sftext/_SUCCESS | 0 python/test_support/data/sftext/part-00000 | Bin 117 -> 123 bytes python/test_support/data/sftext/part-00001 | Bin 117 -> 123 bytes 30 files changed, 213 insertions(+), 112 deletions(-) create mode 100644 core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTests.scala create mode 100644 python/test_support/data/sfarray/._SUCCESS.crc create mode 100644 python/test_support/data/sfarray/.part-r-00000.crc create mode 100644 python/test_support/data/sfarray/.part-r-00001.crc create mode 100755 python/test_support/data/sfarray/_SUCCESS create mode 100755 python/test_support/data/sfarray/part-r-00000 create mode 100755 python/test_support/data/sfarray/part-r-00001 create mode 100644 python/test_support/data/sfclass/._SUCCESS.crc create mode 100644 python/test_support/data/sfclass/.part-r-00000.crc create mode 100644 python/test_support/data/sfclass/.part-r-00001.crc create mode 100755 python/test_support/data/sfclass/_SUCCESS create mode 100755 python/test_support/data/sfclass/part-r-00000 create mode 100755 python/test_support/data/sfclass/part-r-00001 create mode 100644 python/test_support/data/sftext/._SUCCESS.crc create mode 100755 python/test_support/data/sftext/_SUCCESS diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 63f837388e21b..52d443f94a315 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -34,10 +34,10 @@ import org.apache.spark.api.java.function.PairFunction import scala.util.{Success, Failure, Try} import org.msgpack import org.msgpack.ScalaMessagePack -import org.apache.hadoop.mapreduce.InputFormat +import org.apache.hadoop.mapred.InputFormat import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} -import org.apache.hadoop.mapred.SequenceFileOutputFormat +import org.apache.hadoop.mapred.{JobConf, SequenceFileOutputFormat} import org.apache.hadoop.conf.Configuration import java.util @@ -196,69 +196,6 @@ private object SpecialLengths { val TIMING_DATA = -3 } -case class TestClass(var id: String, var number: Int) { - def this() = this("", 0) -} - -object TestHadoop extends App { - - //PythonRDD.writeToStream((1, "bar"), new DataOutputStream(new FileOutputStream("/tmp/test.out"))) - - - //val n = new NullWritable - - import SparkContext._ - - val path = "/tmp/spark/test/sfarray/" - //val path = "/Users/Nick/workspace/java/faunus/output/job-0/" - - val sc = new SparkContext("local[2]", "test") - - //val rdd = sc.sequenceFile[NullWritable, FaunusVertex](path) - //val data = Seq((1.0, "aa"), (2.0, "bb"), (2.0, "aa"), (3.0, "cc"), (2.0, "bb"), (1.0, "aa")) - val data = Seq( - (1, Array(1.0, 2.0, 3.0)), - (2, Array(3.0, 4.0, 5.0)), - (3, Array(4.0, 5.0, 6.0)) - ) - val d = new DoubleWritable(5.0) - val a = new ArrayWritable(classOf[DoubleWritable], Array(d)) - - val rdd = sc.parallelize(data, numSlices = 2) - //.map({ case (k, v) => (new IntWritable(k), v.map(new DoubleWritable(_))) }) - .map{ case (k, v) => (new IntWritable(k), new ArrayWritable(classOf[DoubleWritable], v.map(new DoubleWritable(_)))) } - rdd.saveAsNewAPIHadoopFile[org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat[IntWritable, ArrayWritable]](path) - - /* - val data = Seq( - ("1", TestClass("test1", 123)), - ("2", TestClass("test2", 456)), - ("1", TestClass("test3", 123)), - ("3", TestClass("test56", 456)), - ("2", TestClass("test2", 123)) - ) - val rdd = sc.parallelize(data, numSlices = 2).map{ case (k, v) => (new Text(k), v) } - rdd.saveAsNewAPIHadoopFile(path, - classOf[Text], classOf[TestClass], - classOf[org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat[Text, TestClass]]) - - //val rdd2 = Seq((1, )) - - val seq = sc.sequenceFile[Double, String](path) - val seqR = seq.collect() - - val packed = PythonRDD.serMsgPack(rdd) - val packedR = packed.collect() - val packed2 = PythonRDD.serMsgPack(seq) - val packedR2 = packed2.collect() - - println(seqR.mkString(",")) - println(packedR.mkString(",")) - println(packedR2.mkString(",")) - */ - -} - private[spark] object PythonRDD extends Logging { def readRDDFromFile(sc: JavaSparkContext, filename: String, parallelism: Int): @@ -281,7 +218,7 @@ private[spark] object PythonRDD extends Logging { // PySpark / Hadoop InputFormat stuff - // SequenceFile + /** Create and RDD from a path using [[org.apache.hadoop.mapred.SequenceFileInputFormat]] */ def sequenceFile[K ,V](sc: JavaSparkContext, path: String, keyClass: String, @@ -299,8 +236,11 @@ private[spark] object PythonRDD extends Logging { JavaRDD.fromRDD(SerDeUtil.serMsgPack[K, V](converted)) } - // Arbitrary Hadoop InputFormat, key class and value class - def newHadoopFile[K, V, F <: NewInputFormat[K, V]](sc: JavaSparkContext, + /** + * Create an RDD from a file path, using an arbitrary [[org.apache.hadoop.mapreduce.InputFormat]], + * key and value class + */ + def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]](sc: JavaSparkContext, path: String, inputFormatClazz: String, keyClazz: String, @@ -312,19 +252,36 @@ private[spark] object PythonRDD extends Logging { val baseConf = sc.hadoopConfiguration() val mergedConf = PythonHadoopUtil.mergeConfs(baseConf, conf) val rdd = - newHadoopFileFromClassNames[K, V, F](sc, - path, inputFormatClazz, keyClazz, valueClazz, keyWrapper, valueWrapper, mergedConf) + newAPIHadoopRDDFromClassNames[K, V, F](sc, + Some(path), inputFormatClazz, keyClazz, valueClazz, mergedConf) + val converted = SerDeUtil.convertRDD[K, V](rdd) + JavaRDD.fromRDD(SerDeUtil.serMsgPack[K, V](converted)) + } + + /** + * Create an RDD from a [[org.apache.hadoop.conf.Configuration]] converted from a map that is passed in from Python, + * using an arbitrary [[org.apache.hadoop.mapreduce.InputFormat]], key and value class + */ + def newAPIHadoopRDD[K, V, F <: NewInputFormat[K, V]](sc: JavaSparkContext, + inputFormatClazz: String, + keyClazz: String, + valueClazz: String, + keyWrapper: String, + valueWrapper: String, + confAsMap: java.util.HashMap[String, String]) = { + val conf = PythonHadoopUtil.mapToConf(confAsMap) + val rdd = + newAPIHadoopRDDFromClassNames[K, V, F](sc, + None, inputFormatClazz, keyClazz, valueClazz, conf) val converted = SerDeUtil.convertRDD[K, V](rdd) JavaRDD.fromRDD(SerDeUtil.serMsgPack[K, V](converted)) } - private def newHadoopFileFromClassNames[K, V, F <: NewInputFormat[K, V]](sc: JavaSparkContext, - path: String, + private def newAPIHadoopRDDFromClassNames[K, V, F <: NewInputFormat[K, V]](sc: JavaSparkContext, + path: Option[String] = None, inputFormatClazz: String, keyClazz: String, valueClazz: String, - keyWrapper: String, - valueWrapper: String, conf: Configuration) = { implicit val kcm = ClassManifest.fromClass(Class.forName(keyClazz)).asInstanceOf[ClassManifest[K]] implicit val vcm = ClassManifest.fromClass(Class.forName(valueClazz)).asInstanceOf[ClassManifest[V]] @@ -332,10 +289,66 @@ private[spark] object PythonRDD extends Logging { val kc = kcm.erasure.asInstanceOf[Class[K]] val vc = vcm.erasure.asInstanceOf[Class[V]] val fc = fcm.erasure.asInstanceOf[Class[F]] - sc.sc.newAPIHadoopFile(path, fc, kc, vc, conf) + val rdd = if (path.isDefined) { + sc.sc.newAPIHadoopFile[K, V, F](path.get, fc, kc, vc, conf) + } else { + sc.sc.newAPIHadoopRDD[K, V, F](conf, fc, kc, vc) + } + rdd + } + + def hadoopFile[K, V, F <: InputFormat[K, V]](sc: JavaSparkContext, + path: String, + inputFormatClazz: String, + keyClazz: String, + valueClazz: String, + keyWrapper: String, + valueWrapper: String, + confAsMap: java.util.HashMap[String, String]) = { + val conf = PythonHadoopUtil.mapToConf(confAsMap) + val baseConf = sc.hadoopConfiguration() + val mergedConf = PythonHadoopUtil.mergeConfs(baseConf, conf) + val rdd = + hadoopRDDFromClassNames[K, V, F](sc, + Some(path), inputFormatClazz, keyClazz, valueClazz, mergedConf) + val converted = SerDeUtil.convertRDD[K, V](rdd) + JavaRDD.fromRDD(SerDeUtil.serMsgPack[K, V](converted)) + } + + def hadoopRDD[K, V, F <: InputFormat[K, V]](sc: JavaSparkContext, + inputFormatClazz: String, + keyClazz: String, + valueClazz: String, + keyWrapper: String, + valueWrapper: String, + confAsMap: java.util.HashMap[String, String]) = { + val conf = PythonHadoopUtil.mapToConf(confAsMap) + val rdd = + hadoopRDDFromClassNames[K, V, F](sc, + None, inputFormatClazz, keyClazz, valueClazz, conf) + val converted = SerDeUtil.convertRDD[K, V](rdd) + JavaRDD.fromRDD(SerDeUtil.serMsgPack[K, V](converted)) } - // + private def hadoopRDDFromClassNames[K, V, F <: InputFormat[K, V]](sc: JavaSparkContext, + path: Option[String] = None, + inputFormatClazz: String, + keyClazz: String, + valueClazz: String, + conf: Configuration) = { + implicit val kcm = ClassManifest.fromClass(Class.forName(keyClazz)).asInstanceOf[ClassManifest[K]] + implicit val vcm = ClassManifest.fromClass(Class.forName(valueClazz)).asInstanceOf[ClassManifest[V]] + implicit val fcm = ClassManifest.fromClass(Class.forName(inputFormatClazz)).asInstanceOf[ClassManifest[F]] + val kc = kcm.erasure.asInstanceOf[Class[K]] + val vc = vcm.erasure.asInstanceOf[Class[V]] + val fc = fcm.erasure.asInstanceOf[Class[F]] + val rdd = if (path.isDefined) { + sc.sc.hadoopFile(path.get, fc, kc, vc) + } else { + sc.sc.hadoopRDD(new JobConf(conf), fc, kc, vc) + } + rdd + } def writeToStream(elem: Any, dataOut: DataOutputStream)(implicit m: ClassManifest[Any]) { elem match { diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala index 8a6119e0e452f..169987823efb7 100644 --- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala +++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala @@ -15,43 +15,60 @@ import scala.util.Failure */ private[python] object SerDeUtil extends Logging { + /** Attempts to register a class with MsgPack, only if it is not a primitive or a String */ def register[T](clazz: Class[T], msgpack: ScalaMessagePack) { + //implicit val kcm = ClassManifest.fromClass(clazz) + //val kc = kcm.erasure Try { - log.info("%s".format(clazz)) - clazz match { - case c if c.isPrimitive => - case c if c.isInstanceOf[java.lang.String] => - case _ => msgpack.register(clazz) - } - }.getOrElse(log.warn("Failed to register class (%s) with MsgPack. ".format(clazz.getName) + - "Falling back to default MsgPack serialization, or 'toString' as last resort")) + //if (kc.isInstance("") || kc.isPrimitive) { + // log.info("Class: %s doesn't need to be registered".format(kc.getName)) + //} else { + msgpack.register(clazz) + log.info("Registered key/value class with MsgPack: %s".format(clazz)) + //} + + } match { + case Failure(err) => + log.warn("Failed to register class (%s) with MsgPack. ".format(clazz.getName) + + "Falling back to default MsgPack serialization, or 'toString' as last resort. " + + "Error: %s".format(err.getMessage)) + case Success(result) => + } } - // serialize and RDD[(K, V)] -> RDD[Array[Byte]] using MsgPack + /** Serializes an RDD[(K, V)] -> RDD[Array[Byte]] using MsgPack */ def serMsgPack[K, V](rdd: RDD[(K, V)]) = { import org.msgpack.ScalaMessagePack._ - val msgpack = new ScalaMessagePack with Serializable - val first = rdd.first() - val kc = ClassManifest.fromClass(first._1.getClass).asInstanceOf[ClassManifest[K]].erasure.asInstanceOf[Class[K]] - val vc = ClassManifest.fromClass(first._2.getClass).asInstanceOf[ClassManifest[V]].erasure.asInstanceOf[Class[V]] - register(kc, msgpack) - register(vc, msgpack) - rdd.map{ pair => + rdd.mapPartitions{ pairs => + val mp = new ScalaMessagePack + var triedReg = false + pairs.map{ pair => Try { - msgpack.write(pair) + if (!triedReg) { + register(pair._1.getClass, mp) + register(pair._2.getClass, mp) + triedReg = true + } + mp.write(pair) } match { case Failure(err) => - Try { - write((pair._1.toString, pair._2.toString)) - } match { - case Success(result) => result - case Failure(e) => throw e - } + log.debug("Failed to write", err) + Try { + write((pair._1.toString, pair._2.toString)) + } match { + case Success(result) => result + case Failure(e) => throw e + } case Success(result) => result } } } + } + /** + * Converts an RDD of (K, V) pairs, where K and/or V could be instances of [[org.apache.hadoop.io.Writable]], + * into an RDD[(K, V)] + */ def convertRDD[K, V](rdd: RDD[(K, V)]) = { rdd.map{ case (k: Writable, v: Writable) => (convert(k).asInstanceOf[K], convert(v).asInstanceOf[V]) @@ -61,6 +78,7 @@ private[python] object SerDeUtil extends Logging { } } + /** Converts a [[org.apache.hadoop.io.Writable]] to the underlying primitive, String or object representation */ def convert(writable: Writable): Any = { import collection.JavaConversions._ writable match { diff --git a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTests.scala b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTests.scala new file mode 100644 index 0000000000000..41e4612857d7e --- /dev/null +++ b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTests.scala @@ -0,0 +1,64 @@ +package org.apache.spark.api.python + +import org.apache.spark.SparkContext +import org.apache.hadoop.io._ +import scala.Array +import java.io.{DataOutput, DataInput} + +case class TestWritable(var str: String, var numi: Int, var numd: Double) extends Writable { + def this() = this("", 0, 0.0) + + def write(p1: DataOutput) = { + p1.writeUTF(str) + p1.writeInt(numi) + p1.writeDouble(numd) + } + + def readFields(p1: DataInput) = { + str = p1.readUTF() + numi = p1.readInt() + numd = p1.readDouble() + } +} + +object WriteInputFormatTests extends App { + import SparkContext._ + + val sc = new SparkContext("local[2]", "test") + + val textPath = "../python/test_support/data/sftext/" + val intPath = "../python/test_support/data/sfint/" + val doublePath = "../python/test_support/data/sfdouble/" + val arrPath = "../python/test_support/data/sfarray/" + val classPath = "../python/test_support/data/sfclass/" + + val intKeys = Seq((1.0, "aa"), (2.0, "bb"), (2.0, "aa"), (3.0, "cc"), (2.0, "bb"), (1.0, "aa")) + sc.parallelize(intKeys).saveAsSequenceFile(intPath) + sc.parallelize(intKeys.map{ case (k, v) => (k.toDouble, v) }).saveAsSequenceFile(doublePath) + sc.parallelize(intKeys.map{ case (k, v) => (k.toString, v) }).saveAsSequenceFile(textPath) + + val data = Seq( + (1, Array(1.0, 2.0, 3.0)), + (2, Array(3.0, 4.0, 5.0)), + (3, Array(4.0, 5.0, 6.0)) + ) + sc.parallelize(data, numSlices = 2) + .map{ case (k, v) => + (new IntWritable(k), new ArrayWritable(classOf[DoubleWritable], v.map(new DoubleWritable(_)))) + } + .saveAsNewAPIHadoopFile[org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat[IntWritable, ArrayWritable]](arrPath) + + val testClass = Seq( + ("1", TestWritable("test1", 123, 54.0)), + ("2", TestWritable("test2", 456, 8762.3)), + ("1", TestWritable("test3", 123, 423.1)), + ("3", TestWritable("test56", 456, 423.5)), + ("2", TestWritable("test2", 123, 5435.2)) + ) + val rdd = sc.parallelize(testClass, numSlices = 2).map{ case (k, v) => (new Text(k), v) } + rdd.saveAsNewAPIHadoopFile(classPath, + classOf[Text], classOf[TestWritable], + classOf[org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat[Text, TestWritable]]) + + +} \ No newline at end of file diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 393aef37000d3..ae29728551992 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -214,17 +214,6 @@ def textFile(self, name, minSplits=None): MUTF8Deserializer()) ### - def sequenceFileAsText(self, name): - """ - Read a Hadoopp SequenceFile with arbitrary key and value class from HDFS, - a local file system (available on all nodes), or any Hadoop-supported file system URI, - and return it as an RDD of (String, String) where the key and value representations - are generated using the 'toString()' method of the relevant Java class. - """ - #minSplits = minSplits or min(self.defaultParallelism, 2) - jrdd = self._jvm.PythonRDD.sequenceFileAsText(self._jsc, name) - return RDD(jrdd, self, MsgPackDeserializer()) # MsgPackDeserializer PairMUTF8Deserializer - def sequenceFile(self, name, keyClass="org.apache.hadoop.io.Text", valueClass="org.apache.hadoop.io.Text", keyWrapper="", valueWrapper="", minSplits=None): """ @@ -235,6 +224,8 @@ def sequenceFile(self, name, keyClass="org.apache.hadoop.io.Text", valueClass="o >>> sc.sequenceFile("test_support/data/sfint/").collect() [(1, 'aa'), (2, 'bb'), (2, 'aa'), (3, 'cc'), (2, 'bb'), (1, 'aa')] + >>> sc.sequenceFile("test_support/data/sftext/").collect() + [('1', 'aa'), ('2', 'bb'), ('2', 'aa'), ('3', 'cc'), ('2', 'bb'), ('1', 'aa')] """ minSplits = minSplits or min(self.defaultParallelism, 2) jrdd = self._jvm.PythonRDD.sequenceFile(self._jsc, name, keyClass, valueClass, keyWrapper, valueWrapper, @@ -242,7 +233,22 @@ def sequenceFile(self, name, keyClass="org.apache.hadoop.io.Text", valueClass="o #jrdd = self._jvm.PythonRDD.sequenceFile(self._jsc, name, keyWrapper, valueWrapper, minSplits) return RDD(jrdd, self, MsgPackDeserializer()) # MsgPackDeserializer PairMUTF8Deserializer - def newHadoopFile(self, name, inputFormat, keyClass, valueClass, keyWrapper="toString", valueWrapper="toString", + def newAPIHadoopFile(self, name, inputFormat, keyClass, valueClass, keyWrapper="toString", valueWrapper="toString", + conf = {}): + """ + Read a Hadoopp file with arbitrary InputFormat, key and value class from HDFS, + a local file system (available on all nodes), or any Hadoop-supported file system URI, + and return it as an RDD of (String, String), where the key and value representations + are generated using the 'toString()' method of the relevant Java class. + """ + jconf = self._jvm.java.util.HashMap() + for k, v in conf.iteritems(): + jconf[k] = v + jrdd = self._jvm.PythonRDD.newAPIHadoopFile(self._jsc, name, inputFormat, keyClass, valueClass, keyWrapper, + valueWrapper, jconf) + return RDD(jrdd, self, MsgPackDeserializer()) + + def newAPIHadoopRDD(self, inputFormat, keyClass, valueClass, keyWrapper="toString", valueWrapper="toString", conf = {}): """ Read a Hadoopp file with arbitrary InputFormat, key and value class from HDFS, @@ -253,7 +259,7 @@ def newHadoopFile(self, name, inputFormat, keyClass, valueClass, keyWrapper="toS jconf = self._jvm.java.util.HashMap() for k, v in conf.iteritems(): jconf[k] = v - jrdd = self._jvm.PythonRDD.newHadoopFile(self._jsc, name, inputFormat, keyClass, valueClass, keyWrapper, + jrdd = self._jvm.PythonRDD.newAPIHadoopRDD(self._jsc, inputFormat, keyClass, valueClass, keyWrapper, valueWrapper, jconf) return RDD(jrdd, self, MsgPackDeserializer()) diff --git a/python/test_support/data/sfarray/._SUCCESS.crc b/python/test_support/data/sfarray/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/python/test_support/data/sfarray/.part-r-00000.crc b/python/test_support/data/sfarray/.part-r-00000.crc new file mode 100644 index 0000000000000000000000000000000000000000..7edf7b94e5361d066d25e0039d4184eeb884e012 GIT binary patch literal 12 TcmYc;N@ieSU}A8-VOaqH5&i=Q literal 0 HcmV?d00001 diff --git a/python/test_support/data/sfarray/.part-r-00001.crc b/python/test_support/data/sfarray/.part-r-00001.crc new file mode 100644 index 0000000000000000000000000000000000000000..6bcabe4c213e59aca7813adbc7147979d6254ad8 GIT binary patch literal 12 TcmYc;N@ieSU}9LC-O>dB69)rf literal 0 HcmV?d00001 diff --git a/python/test_support/data/sfarray/_SUCCESS b/python/test_support/data/sfarray/_SUCCESS new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/python/test_support/data/sfarray/part-r-00000 b/python/test_support/data/sfarray/part-r-00000 new file mode 100755 index 0000000000000000000000000000000000000000..4baeb05d9f66dd8011f122a322432256fe96c5d8 GIT binary patch literal 134 zcmWG`4P;ZuFG|--EJ#ewNY%?oOv%qL(96u%^UNy=FUl-QOv*`B!m7Zrs3@@#ri1|u s3hVDJtNYg)U~`n=yUWV7QlOv$5VHU=BM>v&e*ls|-~c9pGzWwM0KVoVo&W#< literal 0 HcmV?d00001 diff --git a/python/test_support/data/sfarray/part-r-00001 b/python/test_support/data/sfarray/part-r-00001 new file mode 100755 index 0000000000000000000000000000000000000000..13c4b47c7799f533f5c31609cb6d62c3477437c0 GIT binary patch literal 174 zcmWG`4P;ZuFG|--EJ#ewNY%?oOv%qL(96u%^UNy=FUl-QOv*`B!m7Zrs3@@#ri1|u z#LhjaXDgrhZ@*AnRn?1$MLMS VMA8T%X~e+5c*5bJtXdin008>1EjR!G literal 0 HcmV?d00001 diff --git a/python/test_support/data/sfclass/part-r-00001 b/python/test_support/data/sfclass/part-r-00001 new file mode 100755 index 0000000000000000000000000000000000000000..e6bae57a5c4c89a9ca00cc5ce83a0743351334c8 GIT binary patch literal 181 zcmWG`4P=wdFG|--EJ#ewNY%?oOv%qL(96u%3rVdg(Ljt1;GHK)d-^12q;wTu+`ca1ORHJGBp4I literal 0 HcmV?d00001 diff --git a/python/test_support/data/sfdouble/.part-00000.crc b/python/test_support/data/sfdouble/.part-00000.crc index d3ec08da4420ca261a8ac24a63656856b9de3b42..a12b149d2d616a4dbf82c434119baa88d5bafc57 100644 GIT binary patch literal 12 TcmYc;N@ieSU}8vnx8Mx`6b=L% literal 12 TcmYc;N@ieSU}DfQ>X--s5N86i diff --git a/python/test_support/data/sfdouble/.part-00001.crc b/python/test_support/data/sfdouble/.part-00001.crc index 6aff915ed35af73b5bacb8e06979cdd9dc73e27b..05fcd0d484b2aad42e209350ee0e81b8b413d053 100644 GIT binary patch literal 12 TcmYc;N@ieSU}7i}JH-wF5j6ss literal 12 TcmYc;N@ieSU}A{7Zovlt5&HtR diff --git a/python/test_support/data/sfdouble/part-00000 b/python/test_support/data/sfdouble/part-00000 index 3f6b37bd218d9691789d8fe5f50958c96cb89bbd..824db337e9a275bd76b02a187f33c5133a3831c0 100755 GIT binary patch delta 26 icmbQpIFWIJhk((N&jq#L%|oS(GXw+{d0d+qVF>_){R#a5 delta 26 icmbQpIFWIJhXCgczC9<6&p9&Q@QC!V&<1ISNAn diff --git a/python/test_support/data/sfdouble/part-00001 b/python/test_support/data/sfdouble/part-00001 index 4555179e42a2c266923cbe5db3275b18b7519938..a1e3b0504fc13ec6b2a92d1e58ee2ffebef27a1a 100755 GIT binary patch delta 26 icmbQpIFWIJhrqVVkcMg5g|Ztq7+qMn;8^O!2ulEvU<;1` delta 26 icmbQpIFWIJhkyXPH218FRx_N8w{BwA{&!?zge3rPO$l`X diff --git a/python/test_support/data/sfint/.part-00000.crc b/python/test_support/data/sfint/.part-00000.crc index e71a63f1f95d3468b584dc7bad0e39564dcb6481..85a67858b046a80b399232ff4135c188ac6ed8c5 100644 GIT binary patch literal 12 TcmYc;N@ieSU}AW8sjCqH6+;9D literal 12 TcmYc;N@ieSU}Dhyk*Ep)5(5I} diff --git a/python/test_support/data/sfint/.part-00001.crc b/python/test_support/data/sfint/.part-00001.crc index 39403cee2721dc121fd3b925f554d87a3012b9de..74ced30437cf50d4d36089902629d05e7dfd9b5a 100644 GIT binary patch literal 12 TcmYc;N@ieSU}Es~?NbH-5ZVHU literal 12 TcmYc;N@ieSU}D(4?PM#Ht`9wW5Rp44w+c nCV#iMT_QVSg4DEK$#Ht`9wW5Rp46J8{ pne0tJ>3gPl{f=JmspUXHb|7W}VrHh~WF#&VQ&JKVmyszk5dedqAvgd4 diff --git a/python/test_support/data/sftext/._SUCCESS.crc b/python/test_support/data/sftext/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/python/test_support/data/sftext/.part-00000.crc b/python/test_support/data/sftext/.part-00000.crc index 0ea1d96c249ae31b3a85750d4c8ab2a11eb22ecb..c85057ef6db055ab4d337676c78fda3296015664 100644 GIT binary patch literal 12 TcmYc;N@ieSU}88ewDTYU6BPrJ literal 12 TcmYc;N@ieSU}AXYThtB!6KVrz diff --git a/python/test_support/data/sftext/.part-00001.crc b/python/test_support/data/sftext/.part-00001.crc index 3c0294ef8e9e434ce6a52c87a228274a5535e48e..c01c28a4fc606948bb03fa9d63ed901385db4e8e 100644 GIT binary patch literal 12 TcmYc;N@ieSU}7kmc~lhu62b#8 literal 12 TcmYc;N@ieSU}CsAv1K{{6gC6E diff --git a/python/test_support/data/sftext/_SUCCESS b/python/test_support/data/sftext/_SUCCESS new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/python/test_support/data/sftext/part-00000 b/python/test_support/data/sftext/part-00000 index 312e146946c4425e2c7c546253a18350603b113f..c5fcff085eb59a9f7ad3ceedbd5f6962b14d6e7c 100755 GIT binary patch delta 66 zcmXS|o?vHtaWB_Ad6om!^*8=)l{Hya$-uzC4#X_XhI$4}iHUHo5s;gdgu+cs1OR&+ B4mAJ( delta 60 zcmb;}onU9~Wa)jU?tc9=xs`RIhn7D3$H2hA3dBr|iA;%!a843aQWBh#43q%?KF$t1 diff --git a/python/test_support/data/sftext/part-00001 b/python/test_support/data/sftext/part-00001 index 86b35d3b1fb93cda1430992392e0dd168f17e027..d7cfd1d14561dab923aa2054560248457a16c22b 100755 GIT binary patch delta 66 zcmXS|o?vIYH7?3|F7x$!r)+!I^ILT2F)%Q&12GG;v7P}_ax$E21mq?q!MTP&Zek(; DRUHj& delta 60 zcmb;}onU9a(kAZS{A1r4t}ZmoUBGHw&A`CG3dBr|$xO+~a84pqQWBh#!jzZ@04SCX AaR2}S