diff --git a/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala b/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala
index 70c7474a936dc..8edf6a1356ad6 100644
--- a/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala
+++ b/bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala
@@ -220,7 +220,7 @@ object Bagel extends Logging {
    */
   private def comp[K: Manifest, V <: Vertex, M <: Message[K], C](
     sc: SparkContext,
-    grouped: RDD[(K, (Seq[C], Seq[V]))],
+    grouped: RDD[(K, (Iterator[C], Iterator[V]))],
     compute: (V, Option[C]) => (V, Array[M]),
     storageLevel: StorageLevel
   ): (RDD[(K, (V, Array[M]))], Int, Int) = {
@@ -230,10 +230,12 @@ object Bagel extends Logging {
       case (_, vs) if vs.size == 0 => None
       case (c, vs) =>
         val (newVert, newMsgs) =
-          compute(vs(0), c match {
-            case Seq(comb) => Some(comb)
-            case Seq() => None
-          })
+          compute(vs.next,
+            c.size match {
+              case 1 => Some(c.next)
+              case _ => None
+            }
+          )
 
         numMsgs += newMsgs.size
         if (newVert.active) {
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 9596dbaf75488..acea9780d25fd 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.api.java
 
-import java.util.{Comparator, List => JList}
+import java.util.{Comparator, List => JList, Iterator => JIterator}
 
 import scala.collection.JavaConversions._
 import scala.reflect.ClassTag
@@ -250,14 +250,14 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Group the values for each key in the RDD into a single sequence. Allows controlling the
    * partitioning of the resulting key-value pair RDD by passing a Partitioner.
    */
-  def groupByKey(partitioner: Partitioner): JavaPairRDD[K, JList[V]] =
+  def groupByKey(partitioner: Partitioner): JavaPairRDD[K, JIterator[V]] =
     fromRDD(groupByResultToJava(rdd.groupByKey(partitioner)))
 
   /**
    * Group the values for each key in the RDD into a single sequence. Hash-partitions the
    * resulting RDD with into `numPartitions` partitions.
    */
-  def groupByKey(numPartitions: Int): JavaPairRDD[K, JList[V]] =
+  def groupByKey(numPartitions: Int): JavaPairRDD[K, JIterator[V]] =
     fromRDD(groupByResultToJava(rdd.groupByKey(numPartitions)))
 
   /**
@@ -367,7 +367,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Group the values for each key in the RDD into a single sequence. Hash-partitions the
    * resulting RDD with the existing partitioner/parallelism level.
    */
-  def groupByKey(): JavaPairRDD[K, JList[V]] =
+  def groupByKey(): JavaPairRDD[K, JIterator[V]] =
     fromRDD(groupByResultToJava(rdd.groupByKey()))
 
   /**
@@ -462,7 +462,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * list of values for that key in `this` as well as `other`.
    */
   def cogroup[W](other: JavaPairRDD[K, W], partitioner: Partitioner)
-  : JavaPairRDD[K, (JList[V], JList[W])] =
+  : JavaPairRDD[K, (JIterator[V], JIterator[W])] =
     fromRDD(cogroupResultToJava(rdd.cogroup(other, partitioner)))
 
   /**
@@ -470,14 +470,14 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * tuple with the list of values for that key in `this`, `other1` and `other2`.
    */
   def cogroup[W1, W2](other1: JavaPairRDD[K, W1], other2: JavaPairRDD[K, W2],
-      partitioner: Partitioner): JavaPairRDD[K, (JList[V], JList[W1], JList[W2])] =
+      partitioner: Partitioner): JavaPairRDD[K, (JIterator[V], JIterator[W1], JIterator[W2])] =
     fromRDD(cogroupResult2ToJava(rdd.cogroup(other1, other2, partitioner)))
 
   /**
    * For each key k in `this` or `other`, return a resulting RDD that contains a tuple with the
    * list of values for that key in `this` as well as `other`.
    */
-  def cogroup[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (JList[V], JList[W])] =
+  def cogroup[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (JIterator[V], JIterator[W])] =
     fromRDD(cogroupResultToJava(rdd.cogroup(other)))
 
   /**
@@ -485,7 +485,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * tuple with the list of values for that key in `this`, `other1` and `other2`.
    */
   def cogroup[W1, W2](other1: JavaPairRDD[K, W1], other2: JavaPairRDD[K, W2])
-  : JavaPairRDD[K, (JList[V], JList[W1], JList[W2])] =
+  : JavaPairRDD[K, (JIterator[V], JIterator[W1], JIterator[W2])] =
     fromRDD(cogroupResult2ToJava(rdd.cogroup(other1, other2)))
 
   /**
@@ -493,7 +493,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * list of values for that key in `this` as well as `other`.
    */
   def cogroup[W](other: JavaPairRDD[K, W], numPartitions: Int)
-  : JavaPairRDD[K, (JList[V], JList[W])] =
+  : JavaPairRDD[K, (JIterator[V], JIterator[W])] =
     fromRDD(cogroupResultToJava(rdd.cogroup(other, numPartitions)))
 
   /**
@@ -501,16 +501,16 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * tuple with the list of values for that key in `this`, `other1` and `other2`.
    */
   def cogroup[W1, W2](other1: JavaPairRDD[K, W1], other2: JavaPairRDD[K, W2], numPartitions: Int)
-  : JavaPairRDD[K, (JList[V], JList[W1], JList[W2])] =
+  : JavaPairRDD[K, (JIterator[V], JIterator[W1], JIterator[W2])] =
     fromRDD(cogroupResult2ToJava(rdd.cogroup(other1, other2, numPartitions)))
 
   /** Alias for cogroup. */
-  def groupWith[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (JList[V], JList[W])] =
+  def groupWith[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (JIterator[V], JIterator[W])] =
     fromRDD(cogroupResultToJava(rdd.groupWith(other)))
 
   /** Alias for cogroup. */
   def groupWith[W1, W2](other1: JavaPairRDD[K, W1], other2: JavaPairRDD[K, W2])
-  : JavaPairRDD[K, (JList[V], JList[W1], JList[W2])] =
+  : JavaPairRDD[K, (JIterator[V], JIterator[W1], JIterator[W2])] =
     fromRDD(cogroupResult2ToJava(rdd.groupWith(other1, other2)))
 
   /**
@@ -695,21 +695,21 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
 
 object JavaPairRDD {
   private[spark]
-  def groupByResultToJava[K: ClassTag, T](rdd: RDD[(K, Seq[T])]): RDD[(K, JList[T])] = {
-    rddToPairRDDFunctions(rdd).mapValues(seqAsJavaList)
+  def groupByResultToJava[K: ClassTag, T](rdd: RDD[(K, Iterator[T])]): RDD[(K, JIterator[T])] = {
+    rddToPairRDDFunctions(rdd).mapValues(asJavaIterator)
   }
 
   private[spark]
   def cogroupResultToJava[K: ClassTag, V, W](
-      rdd: RDD[(K, (Seq[V], Seq[W]))]): RDD[(K, (JList[V], JList[W]))] = {
-    rddToPairRDDFunctions(rdd).mapValues(x => (seqAsJavaList(x._1), seqAsJavaList(x._2)))
+      rdd: RDD[(K, (Iterator[V], Iterator[W]))]): RDD[(K, (JIterator[V], JIterator[W]))] = {
+    rddToPairRDDFunctions(rdd).mapValues(x => (asJavaIterator(x._1), asJavaIterator(x._2)))
   }
 
   private[spark]
   def cogroupResult2ToJava[K: ClassTag, V, W1, W2](
-      rdd: RDD[(K, (Seq[V], Seq[W1], Seq[W2]))]): RDD[(K, (JList[V], JList[W1], JList[W2]))] = {
+      rdd: RDD[(K, (Iterator[V], Iterator[W1], Iterator[W2]))]): RDD[(K, (JIterator[V], JIterator[W1], JIterator[W2]))] = {
     rddToPairRDDFunctions(rdd)
-      .mapValues(x => (seqAsJavaList(x._1), seqAsJavaList(x._2), seqAsJavaList(x._3)))
+      .mapValues(x => (asJavaIterator(x._1), asJavaIterator(x._2), asJavaIterator(x._3)))
   }
 
   def fromRDD[K: ClassTag, V: ClassTag](rdd: RDD[(K, V)]): JavaPairRDD[K, V] = {
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 6e8ec8e0c7629..fd74e8218f925 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -204,7 +204,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements
    * mapping to that key.
    */
-  def groupBy[K](f: JFunction[T, K]): JavaPairRDD[K, JList[T]] = {
+  def groupBy[K](f: JFunction[T, K]): JavaPairRDD[K, JIterator[T]] = {
     implicit val ctagK: ClassTag[K] = fakeClassTag
     implicit val ctagV: ClassTag[JList[T]] = fakeClassTag
     JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f)(fakeClassTag)))
@@ -214,7 +214,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements
    * mapping to that key.
    */
-  def groupBy[K](f: JFunction[T, K], numPartitions: Int): JavaPairRDD[K, JList[T]] = {
+  def groupBy[K](f: JFunction[T, K], numPartitions: Int): JavaPairRDD[K, JIterator[T]] = {
     implicit val ctagK: ClassTag[K] = fakeClassTag
     implicit val ctagV: ClassTag[JList[T]] = fakeClassTag
     JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f, numPartitions)(fakeClassTag[K])))
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 14386ff5b9127..3e005a7e1dc50 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -261,7 +261,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * Group the values for each key in the RDD into a single sequence. Allows controlling the
    * partitioning of the resulting key-value pair RDD by passing a Partitioner.
    */
-  def groupByKey(partitioner: Partitioner): RDD[(K, Seq[V])] = {
+  def groupByKey(partitioner: Partitioner): RDD[(K, Iterator[V])] = {
     // groupByKey shouldn't use map side combine because map side combine does not
     // reduce the amount of data shuffled and requires all map side data be inserted
     // into a hash table, leading to more objects in the old gen.
@@ -270,14 +270,14 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
     def mergeCombiners(c1: ArrayBuffer[V], c2: ArrayBuffer[V]) = c1 ++ c2
     val bufs = combineByKey[ArrayBuffer[V]](
       createCombiner _, mergeValue _, mergeCombiners _, partitioner, mapSideCombine=false)
-    bufs.asInstanceOf[RDD[(K, Seq[V])]]
+    bufs.asInstanceOf[RDD[(K, Iterator[V])]]
   }
 
   /**
    * Group the values for each key in the RDD into a single sequence. Hash-partitions the
    * resulting RDD with into `numPartitions` partitions.
    */
-  def groupByKey(numPartitions: Int): RDD[(K, Seq[V])] = {
+  def groupByKey(numPartitions: Int): RDD[(K, Iterator[V])] = {
     groupByKey(new HashPartitioner(numPartitions))
   }
 
@@ -298,7 +298,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    */
   def join[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, W))] = {
     this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
-      for (v <- vs.iterator; w <- ws.iterator) yield (v, w)
+      for (v <- vs; w <- ws) yield (v, w)
     }
   }
 
@@ -311,9 +311,9 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
   def leftOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, Option[W]))] = {
     this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
       if (ws.isEmpty) {
-        vs.iterator.map(v => (v, None))
+        vs.map(v => (v, None))
       } else {
-        for (v <- vs.iterator; w <- ws.iterator) yield (v, Some(w))
+        for (v <- vs; w <- ws) yield (v, Some(w))
       }
     }
   }
@@ -328,9 +328,9 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
       : RDD[(K, (Option[V], W))] = {
     this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
       if (vs.isEmpty) {
-        ws.iterator.map(w => (None, w))
+        ws.map(w => (None, w))
       } else {
-        for (v <- vs.iterator; w <- ws.iterator) yield (Some(v), w)
+        for (v <- vs; w <- ws) yield (Some(v), w)
       }
     }
   }
@@ -358,7 +358,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * Group the values for each key in the RDD into a single sequence. Hash-partitions the
    * resulting RDD with the existing partitioner/parallelism level.
    */
-  def groupByKey(): RDD[(K, Seq[V])] = {
+  def groupByKey(): RDD[(K, Iterator[V])] = {
     groupByKey(defaultPartitioner(self))
   }
 
@@ -453,13 +453,13 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * For each key k in `this` or `other`, return a resulting RDD that contains a tuple with the
    * list of values for that key in `this` as well as `other`.
    */
-  def cogroup[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (Seq[V], Seq[W]))] = {
+  def cogroup[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (Iterator[V], Iterator[W]))] = {
     if (partitioner.isInstanceOf[HashPartitioner] && getKeyClass().isArray) {
       throw new SparkException("Default partitioner cannot partition array keys.")
     }
     val cg = new CoGroupedRDD[K](Seq(self, other), partitioner)
     cg.mapValues { case Seq(vs, ws) =>
-      (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]])
+      (vs.asInstanceOf[Seq[V]].iterator, ws.asInstanceOf[Seq[W]].iterator)
     }
   }
 
@@ -468,13 +468,13 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * tuple with the list of values for that key in `this`, `other1` and `other2`.
    */
   def cogroup[W1, W2](other1: RDD[(K, W1)], other2: RDD[(K, W2)], partitioner: Partitioner)
-      : RDD[(K, (Seq[V], Seq[W1], Seq[W2]))] = {
+      : RDD[(K, (Iterator[V], Iterator[W1], Iterator[W2]))] = {
     if (partitioner.isInstanceOf[HashPartitioner] && getKeyClass().isArray) {
       throw new SparkException("Default partitioner cannot partition array keys.")
     }
     val cg = new CoGroupedRDD[K](Seq(self, other1, other2), partitioner)
     cg.mapValues { case Seq(vs, w1s, w2s) =>
-      (vs.asInstanceOf[Seq[V]], w1s.asInstanceOf[Seq[W1]], w2s.asInstanceOf[Seq[W2]])
+      (vs.asInstanceOf[Seq[V]].iterator, w1s.asInstanceOf[Seq[W1]].iterator, w2s.asInstanceOf[Seq[W2]].iterator)
     }
   }
 
@@ -482,7 +482,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * For each key k in `this` or `other`, return a resulting RDD that contains a tuple with the
    * list of values for that key in `this` as well as `other`.
    */
-  def cogroup[W](other: RDD[(K, W)]): RDD[(K, (Seq[V], Seq[W]))] = {
+  def cogroup[W](other: RDD[(K, W)]): RDD[(K, (Iterator[V], Iterator[W]))] = {
     cogroup(other, defaultPartitioner(self, other))
   }
 
@@ -491,7 +491,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * tuple with the list of values for that key in `this`, `other1` and `other2`.
    */
   def cogroup[W1, W2](other1: RDD[(K, W1)], other2: RDD[(K, W2)])
-      : RDD[(K, (Seq[V], Seq[W1], Seq[W2]))] = {
+      : RDD[(K, (Iterator[V], Iterator[W1], Iterator[W2]))] = {
     cogroup(other1, other2, defaultPartitioner(self, other1, other2))
   }
 
@@ -499,7 +499,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * For each key k in `this` or `other`, return a resulting RDD that contains a tuple with the
    * list of values for that key in `this` as well as `other`.
    */
-  def cogroup[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (Seq[V], Seq[W]))] = {
+  def cogroup[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (Iterator[V], Iterator[W]))] = {
     cogroup(other, new HashPartitioner(numPartitions))
   }
 
@@ -508,18 +508,18 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
    * tuple with the list of values for that key in `this`, `other1` and `other2`.
    */
   def cogroup[W1, W2](other1: RDD[(K, W1)], other2: RDD[(K, W2)], numPartitions: Int)
-      : RDD[(K, (Seq[V], Seq[W1], Seq[W2]))] = {
+      : RDD[(K, (Iterator[V], Iterator[W1], Iterator[W2]))] = {
     cogroup(other1, other2, new HashPartitioner(numPartitions))
   }
 
   /** Alias for cogroup. */
-  def groupWith[W](other: RDD[(K, W)]): RDD[(K, (Seq[V], Seq[W]))] = {
+  def groupWith[W](other: RDD[(K, W)]): RDD[(K, (Iterator[V], Iterator[W]))] = {
     cogroup(other, defaultPartitioner(self, other))
   }
 
   /** Alias for cogroup. */
   def groupWith[W1, W2](other1: RDD[(K, W1)], other2: RDD[(K, W2)])
-      : RDD[(K, (Seq[V], Seq[W1], Seq[W2]))] = {
+      : RDD[(K, (Iterator[V], Iterator[W1], Iterator[W2]))] = {
     cogroup(other1, other2, defaultPartitioner(self, other1, other2))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index bf3c57ad41eb2..a702ef4348268 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -438,20 +438,20 @@ abstract class RDD[T: ClassTag](
   /**
    * Return an RDD of grouped items.
    */
-  def groupBy[K: ClassTag](f: T => K): RDD[(K, Seq[T])] =
+  def groupBy[K: ClassTag](f: T => K): RDD[(K, Iterator[T])] =
     groupBy[K](f, defaultPartitioner(this))
 
   /**
    * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements
    * mapping to that key.
    */
-  def groupBy[K: ClassTag](f: T => K, numPartitions: Int): RDD[(K, Seq[T])] =
+  def groupBy[K: ClassTag](f: T => K, numPartitions: Int): RDD[(K, Iterator[T])] =
     groupBy(f, new HashPartitioner(numPartitions))
 
   /**
    * Return an RDD of grouped items.
    */
-  def groupBy[K: ClassTag](f: T => K, p: Partitioner): RDD[(K, Seq[T])] = {
+  def groupBy[K: ClassTag](f: T => K, p: Partitioner): RDD[(K, Iterator[T])] = {
     val cleanF = sc.clean(f)
     this.map(t => (cleanF(t), t)).groupByKey(p)
   }
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
index eb70fb547564c..7352160a55fec 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
@@ -26,8 +26,9 @@
 import org.apache.spark.api.java.function.PairFlatMapFunction;
 import org.apache.spark.api.java.function.PairFunction;
 
-import java.util.List;
 import java.util.ArrayList;
+import java.util.List;
+import java.util.Iterator;
 import java.util.regex.Pattern;
 
 /**
@@ -66,7 +67,7 @@ public static void main(String[] args) throws Exception {
     JavaRDD<String> lines = ctx.textFile(args[1], 1);
 
     // Loads all URLs from input file and initialize their neighbors.
-    JavaPairRDD<String, List<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
+    JavaPairRDD<String, Iterator<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
       @Override
       public Tuple2<String, String> call(String s) {
         String[] parts = SPACES.split(s);
@@ -75,9 +76,9 @@ public Tuple2<String, String> call(String s) {
     }).distinct().groupByKey().cache();
 
     // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
-    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<List<String>, Double>() {
+    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterator<String>, Double>() {
       @Override
-      public Double call(List<String> rs) {
+      public Double call(Iterator<String> rs) {
         return 1.0;
       }
     });
@@ -86,12 +87,16 @@ public Double call(List<String> rs) {
     for (int current = 0; current < Integer.parseInt(args[2]); current++) {
       // Calculates URL contributions to the rank of other URLs.
       JavaPairRDD<String, Double> contribs = links.join(ranks).values()
-        .flatMapToPair(new PairFlatMapFunction<Tuple2<List<String>, Double>, String, Double>() {
+        .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterator<String>, Double>, String, Double>() {
           @Override
-          public Iterable<Tuple2<String, Double>> call(Tuple2<List<String>, Double> s) {
+          public Iterable<Tuple2<String, Double>> call(Tuple2<Iterator<String>, Double> s) {
+            List<String> urls = new ArrayList<String>();
+            while (s._1.hasNext()) {
+              urls.add(s._1.next());
+            }
             List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
-            for (String n : s._1()) {
-              results.add(new Tuple2<String, Double>(n, s._2() / s._1().size()));
+            for (String n : urls) {
+              results.add(new Tuple2<String, Double>(n, s._2() / urls.size()));
             }
             return results;
           }
diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala
index 27afa6b642758..698c7cb2e85d1 100644
--- a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala
@@ -116,11 +116,13 @@ object WikipediaPageRankStandalone {
     for (i <- 1 to numIterations) {
       val contribs = links.groupWith(ranks).flatMap {
         case (id, (linksWrapper, rankWrapper)) =>
-          if (linksWrapper.length > 0) {
-            if (rankWrapper.length > 0) {
-              linksWrapper(0).map(dest => (dest, rankWrapper(0) / linksWrapper(0).size))
+          if (linksWrapper.hasNext) {
+            val linksWrapperHead = linksWrapper.next
+            if (rankWrapper.hasNext) {
+              val rankWrapperHead = rankWrapper.next
+              linksWrapperHead.map(dest => (dest, rankWrapperHead / linksWrapperHead.size))
             } else {
-              linksWrapper(0).map(dest => (dest, defaultRank / linksWrapper(0).size))
+              linksWrapperHead.map(dest => (dest, defaultRank / linksWrapperHead.size))
             }
           } else {
             Array[(String, Double)]()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 0cc9f48769f83..fc9a74ffbef86 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -421,12 +421,12 @@ class ALS private (
    * Compute the new feature vectors for a block of the users matrix given the list of factors
    * it received from each product and its InLinkBlock.
    */
-  private def updateBlock(messages: Seq[(Int, Array[Array[Double]])], inLinkBlock: InLinkBlock,
+  private def updateBlock(messages: Iterator[(Int, Array[Array[Double]])], inLinkBlock: InLinkBlock,
       rank: Int, lambda: Double, alpha: Double, YtY: Option[Broadcast[DoubleMatrix]])
     : Array[Array[Double]] =
   {
     // Sort the incoming block factor messages by block ID and make them an array
-    val blockFactors = messages.sortBy(_._1).map(_._2).toArray // Array[Array[Double]]
+    val blockFactors = messages.toArray.sortBy(_._1).map(_._2) // Array[Array[Double]]
     val numBlocks = blockFactors.length
     val numUsers = inLinkBlock.elementIds.length
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LAUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LAUtils.scala
index afe081295bfae..8445afef12357 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LAUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LAUtils.scala
@@ -38,8 +38,9 @@ object LAUtils {
       case (i, cols) =>
         val rowArray = Array.ofDim[Double](n)
         var j = 0
-        while (j < cols.size) {
-          rowArray(cols(j)._1) = cols(j)._2
+        while (cols.hasNext) {
+          val element = cols.next
+          rowArray(element._1) = element._2
           j += 1
         }
         MatrixRow(i, rowArray)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index ac451d1913aaa..4b29f3e2f93b2 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.streaming.api.java
 
 import java.lang.{Long => JLong}
-import java.util.{List => JList}
+import java.util.{List => JList, Iterator => JIterator}
 
 import scala.collection.JavaConversions._
 import scala.reflect.ClassTag
@@ -115,15 +115,15 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
    * generate the RDDs with Spark's default number of partitions.
    */
-  def groupByKey(): JavaPairDStream[K, JList[V]] =
-    dstream.groupByKey().mapValues(seqAsJavaList _)
+  def groupByKey(): JavaPairDStream[K, JIterator[V]] =
+    dstream.groupByKey().mapValues(asJavaIterator _)
 
   /**
    * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
    * generate the RDDs with `numPartitions` partitions.
    */
-  def groupByKey(numPartitions: Int): JavaPairDStream[K, JList[V]] =
-    dstream.groupByKey(numPartitions).mapValues(seqAsJavaList _)
+  def groupByKey(numPartitions: Int): JavaPairDStream[K, JIterator[V]] =
+    dstream.groupByKey(numPartitions).mapValues(asJavaIterator _)
 
   /**
    * Return a new DStream by applying `groupByKey` on each RDD of `this` DStream.
@@ -131,8 +131,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * single sequence to generate the RDDs of the new DStream. org.apache.spark.Partitioner
    * is used to control the partitioning of each RDD.
    */
-  def groupByKey(partitioner: Partitioner): JavaPairDStream[K, JList[V]] =
-    dstream.groupByKey(partitioner).mapValues(seqAsJavaList _)
+  def groupByKey(partitioner: Partitioner): JavaPairDStream[K, JIterator[V]] =
+    dstream.groupByKey(partitioner).mapValues(asJavaIterator _)
 
   /**
    * Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
@@ -196,8 +196,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * @param windowDuration width of the window; must be a multiple of this DStream's
    *                       batching interval
    */
-  def groupByKeyAndWindow(windowDuration: Duration): JavaPairDStream[K, JList[V]] = {
-    dstream.groupByKeyAndWindow(windowDuration).mapValues(seqAsJavaList _)
+  def groupByKeyAndWindow(windowDuration: Duration): JavaPairDStream[K, JIterator[V]] = {
+    dstream.groupByKeyAndWindow(windowDuration).mapValues(asJavaIterator _)
   }
 
   /**
@@ -211,8 +211,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    *                       DStream's batching interval
    */
   def groupByKeyAndWindow(windowDuration: Duration, slideDuration: Duration)
-  : JavaPairDStream[K, JList[V]] = {
-    dstream.groupByKeyAndWindow(windowDuration, slideDuration).mapValues(seqAsJavaList _)
+  : JavaPairDStream[K, JIterator[V]] = {
+    dstream.groupByKeyAndWindow(windowDuration, slideDuration).mapValues(asJavaIterator _)
   }
 
   /**
@@ -227,9 +227,9 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * @param numPartitions  Number of partitions of each RDD in the new DStream.
    */
   def groupByKeyAndWindow(windowDuration: Duration, slideDuration: Duration, numPartitions: Int)
-  :JavaPairDStream[K, JList[V]] = {
+  :JavaPairDStream[K, JIterator[V]] = {
     dstream.groupByKeyAndWindow(windowDuration, slideDuration, numPartitions)
-      .mapValues(seqAsJavaList _)
+      .mapValues(asJavaIterator _)
   }
 
   /**
@@ -247,9 +247,9 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       windowDuration: Duration,
       slideDuration: Duration,
       partitioner: Partitioner
-    ):JavaPairDStream[K, JList[V]] = {
+    ):JavaPairDStream[K, JIterator[V]] = {
     dstream.groupByKeyAndWindow(windowDuration, slideDuration, partitioner)
-      .mapValues(seqAsJavaList _)
+      .mapValues(asJavaIterator _)
   }
 
   /**
@@ -518,9 +518,9 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * Hash partitioning is used to generate the RDDs with Spark's default number
    * of partitions.
    */
-  def cogroup[W](other: JavaPairDStream[K, W]): JavaPairDStream[K, (JList[V], JList[W])] = {
+  def cogroup[W](other: JavaPairDStream[K, W]): JavaPairDStream[K, (JIterator[V], JIterator[W])] = {
     implicit val cm: ClassTag[W] = fakeClassTag
-    dstream.cogroup(other.dstream).mapValues(t => (seqAsJavaList(t._1), seqAsJavaList((t._2))))
+    dstream.cogroup(other.dstream).mapValues(t => (asJavaIterator(t._1), asJavaIterator((t._2))))
   }
 
   /**
@@ -530,10 +530,10 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   def cogroup[W](
       other: JavaPairDStream[K, W],
       numPartitions: Int
-    ): JavaPairDStream[K, (JList[V], JList[W])] = {
+    ): JavaPairDStream[K, (JIterator[V], JIterator[W])] = {
     implicit val cm: ClassTag[W] = fakeClassTag
     dstream.cogroup(other.dstream, numPartitions)
-           .mapValues(t => (seqAsJavaList(t._1), seqAsJavaList((t._2))))
+           .mapValues(t => (asJavaIterator(t._1), asJavaIterator((t._2))))
   }
 
   /**
@@ -543,10 +543,10 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   def cogroup[W](
       other: JavaPairDStream[K, W],
       partitioner: Partitioner
-    ): JavaPairDStream[K, (JList[V], JList[W])] = {
+    ): JavaPairDStream[K, (JIterator[V], JIterator[W])] = {
     implicit val cm: ClassTag[W] = fakeClassTag
     dstream.cogroup(other.dstream, partitioner)
-           .mapValues(t => (seqAsJavaList(t._1), seqAsJavaList((t._2))))
+           .mapValues(t => (asJavaIterator(t._1), asJavaIterator((t._2))))
   }
 
   /**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index 2473496949360..8beace1d8046a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -51,7 +51,7 @@ class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
    * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
    * generate the RDDs with Spark's default number of partitions.
    */
-  def groupByKey(): DStream[(K, Seq[V])] = {
+  def groupByKey(): DStream[(K, Iterator[V])] = {
     groupByKey(defaultPartitioner())
   }
 
@@ -59,7 +59,7 @@ class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
    * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to
    * generate the RDDs with `numPartitions` partitions.
    */
-  def groupByKey(numPartitions: Int): DStream[(K, Seq[V])] = {
+  def groupByKey(numPartitions: Int): DStream[(K, Iterator[V])] = {
     groupByKey(defaultPartitioner(numPartitions))
   }
 
@@ -67,12 +67,12 @@ class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
    * Return a new DStream by applying `groupByKey` on each RDD. The supplied
    * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
    */
-  def groupByKey(partitioner: Partitioner): DStream[(K, Seq[V])] = {
+  def groupByKey(partitioner: Partitioner): DStream[(K, Iterator[V])] = {
     val createCombiner = (v: V) => ArrayBuffer[V](v)
     val mergeValue = (c: ArrayBuffer[V], v: V) => (c += v)
     val mergeCombiner = (c1: ArrayBuffer[V], c2: ArrayBuffer[V]) => (c1 ++ c2)
     combineByKey(createCombiner, mergeValue, mergeCombiner, partitioner)
-      .asInstanceOf[DStream[(K, Seq[V])]]
+      .asInstanceOf[DStream[(K, Iterator[V])]]
   }
 
   /**
@@ -126,7 +126,7 @@ class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
    * @param windowDuration width of the window; must be a multiple of this DStream's
    *                       batching interval
    */
-  def groupByKeyAndWindow(windowDuration: Duration): DStream[(K, Seq[V])] = {
+  def groupByKeyAndWindow(windowDuration: Duration): DStream[(K, Iterator[V])] = {
     groupByKeyAndWindow(windowDuration, self.slideDuration, defaultPartitioner())
   }
 
@@ -140,7 +140,7 @@ class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
    *                       the new DStream will generate RDDs); must be a multiple of this
    *                       DStream's batching interval
    */
-  def groupByKeyAndWindow(windowDuration: Duration, slideDuration: Duration): DStream[(K, Seq[V])] =
+  def groupByKeyAndWindow(windowDuration: Duration, slideDuration: Duration): DStream[(K, Iterator[V])] =
   {
     groupByKeyAndWindow(windowDuration, slideDuration, defaultPartitioner())
   }
@@ -161,7 +161,7 @@ class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
       windowDuration: Duration,
       slideDuration: Duration,
       numPartitions: Int
-    ): DStream[(K, Seq[V])] = {
+    ): DStream[(K, Iterator[V])] = {
     groupByKeyAndWindow(windowDuration, slideDuration, defaultPartitioner(numPartitions))
   }
 
@@ -180,14 +180,14 @@ class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
       windowDuration: Duration,
       slideDuration: Duration,
       partitioner: Partitioner
-    ): DStream[(K, Seq[V])] = {
-    val createCombiner = (v: Seq[V]) => new ArrayBuffer[V] ++= v
-    val mergeValue = (buf: ArrayBuffer[V], v: Seq[V]) => buf ++= v
+    ): DStream[(K, Iterator[V])] = {
+    val createCombiner = (v: Iterator[V]) => new ArrayBuffer[V] ++= v
+    val mergeValue = (buf: ArrayBuffer[V], v: Iterator[V]) => buf ++= v
     val mergeCombiner = (buf1: ArrayBuffer[V], buf2: ArrayBuffer[V]) => buf1 ++= buf2
     self.groupByKey(partitioner)
         .window(windowDuration, slideDuration)
         .combineByKey[ArrayBuffer[V]](createCombiner, mergeValue, mergeCombiner, partitioner)
-        .asInstanceOf[DStream[(K, Seq[V])]]
+        .asInstanceOf[DStream[(K, Iterator[V])]]
   }
 
   /**
@@ -438,7 +438,7 @@ class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
    * Hash partitioning is used to generate the RDDs with Spark's default number
    * of partitions.
    */
-  def cogroup[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Seq[V], Seq[W]))] = {
+  def cogroup[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Iterator[V], Iterator[W]))] = {
     cogroup(other, defaultPartitioner())
   }
 
@@ -447,7 +447,7 @@ class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
    * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
    */
   def cogroup[W: ClassTag](other: DStream[(K, W)], numPartitions: Int)
-  : DStream[(K, (Seq[V], Seq[W]))] = {
+  : DStream[(K, (Iterator[V], Iterator[W]))] = {
     cogroup(other, defaultPartitioner(numPartitions))
   }
 
@@ -458,7 +458,7 @@ class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
   def cogroup[W: ClassTag](
       other: DStream[(K, W)],
       partitioner: Partitioner
-    ): DStream[(K, (Seq[V], Seq[W]))] = {
+    ): DStream[(K, (Iterator[V], Iterator[W]))] = {
     self.transformWith(
       other,
       (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.cogroup(rdd2, partitioner)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
index 5f7d3ba26c656..09ff4163985c9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
@@ -56,9 +56,13 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
             // first map the cogrouped tuple to tuples of required type,
             // and then apply the update function
             val updateFuncLocal = updateFunc
-            val finalFunc = (iterator: Iterator[(K, (Seq[V], Seq[S]))]) => {
+            val finalFunc = (iterator: Iterator[(K, (Iterator[V], Iterator[S]))]) => {
               val i = iterator.map(t => {
-                (t._1, t._2._1, t._2._2.headOption)
+                val headOption = t._2._2.hasNext match {
+                  case true => Some(t._2._2.next())
+                  case false => None
+                }
+                (t._1, t._2._1.toSeq, headOption)
               })
               updateFuncLocal(i)
             }
@@ -90,8 +94,8 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
             // first map the grouped tuple to tuples of required type,
             // and then apply the update function
             val updateFuncLocal = updateFunc
-            val finalFunc = (iterator: Iterator[(K, Seq[V])]) => {
-              updateFuncLocal(iterator.map(tuple => (tuple._1, tuple._2, None)))
+            val finalFunc = (iterator: Iterator[(K, Iterator[V])]) => {
+              updateFuncLocal(iterator.map(tuple => (tuple._1, tuple._2.toSeq, None)))
             }
 
             val groupedRDD = parentRDD.groupByKey(partitioner)