Skip to content

Commit

Permalink
SPARK-1160: Deprecate toArray in RDD
Browse files Browse the repository at this point in the history
https://spark-project.atlassian.net/browse/SPARK-1160

reported by @mateiz: "It's redundant with collect() and the name doesn't make sense in Java, where we return a List (we can't return an array due to the way Java generics work). It's also missing in Python."

In this patch, I deprecated the method and changed the source files using it by replacing toArray with collect() directly

Author: CodingCat <[email protected]>

Closes apache#105 from CodingCat/SPARK-1060 and squashes the following commits:

286f163 [CodingCat] deprecate in JavaRDDLike
ee17b4e [CodingCat] add message and since
2ff7319 [CodingCat] deprecate toArray in RDD
  • Loading branch information
CodingCat authored and aarondav committed Mar 13, 2014
1 parent b8afe30 commit 9032f7c
Show file tree
Hide file tree
Showing 8 changed files with 13 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/**
* Return an array that contains all of the elements in this RDD.
*/
@deprecated("use collect", "1.0.0")
def toArray(): JList[T] = collect()

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
* Return the key-value pairs in this RDD to the master as a Map.
*/
def collectAsMap(): Map[K, V] = {
val data = self.toArray()
val data = self.collect()
val map = new mutable.HashMap[K, V]
map.sizeHint(data.length)
data.foreach { case (k, v) => map.put(k, v) }
Expand Down
1 change: 1 addition & 0 deletions core/src/main/scala/org/apache/spark/rdd/RDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,7 @@ abstract class RDD[T: ClassTag](
/**
* Return an array that contains all of the elements in this RDD.
*/
@deprecated("use collect", "1.0.0")
def toArray(): Array[T] = collect()

/**
Expand Down
4 changes: 2 additions & 2 deletions core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ import cern.jet.random.engine.DRand

import org.apache.spark.{Partition, TaskContext}

@deprecated("Replaced by PartitionwiseSampledRDDPartition", "1.0")
@deprecated("Replaced by PartitionwiseSampledRDDPartition", "1.0.0")
private[spark]
class SampledRDDPartition(val prev: Partition, val seed: Int) extends Partition with Serializable {
override val index: Int = prev.index
}

@deprecated("Replaced by PartitionwiseSampledRDD", "1.0")
@deprecated("Replaced by PartitionwiseSampledRDD", "1.0.0")
class SampledRDD[T: ClassTag](
prev: RDD[T],
withReplacement: Boolean,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ object SparkALS {
println("Iteration " + iter + ":")
ms = sc.parallelize(0 until M, slices)
.map(i => update(i, msb.value(i), usb.value, Rc.value))
.toArray
.collect()
msb = sc.broadcast(ms) // Re-broadcast ms because it was updated
us = sc.parallelize(0 until U, slices)
.map(i => update(i, usb.value(i), msb.value, algebra.transpose(Rc.value)))
.toArray
.collect()
usb = sc.broadcast(us) // Re-broadcast us because it was updated
println("RMSE = " + rmse(R, ms, us))
println()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,6 @@ object SparkSVD {
val s = decomposed.S.data
val v = decomposed.V.data

println("singular values = " + s.toArray.mkString)
println("singular values = " + s.collect().mkString)
}
}
4 changes: 2 additions & 2 deletions mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ object SVD {

// Construct jblas A^T A locally
val ata = DoubleMatrix.zeros(n, n)
for (entry <- emits.toArray) {
for (entry <- emits.collect()) {
ata.put(entry._1._1, entry._1._2, entry._2)
}

Expand Down Expand Up @@ -178,7 +178,7 @@ object SVD {
val s = decomposed.S.data
val v = decomposed.V.data

println("Computed " + s.toArray.length + " singular values and vectors")
println("Computed " + s.collect().length + " singular values and vectors")
u.saveAsTextFile(output_u)
s.saveAsTextFile(output_s)
v.saveAsTextFile(output_v)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
val m = matrix.m
val n = matrix.n
val ret = DoubleMatrix.zeros(m, n)
matrix.data.toArray.map(x => ret.put(x.i, x.j, x.mval))
matrix.data.collect().map(x => ret.put(x.i, x.j, x.mval))
ret
}

Expand Down Expand Up @@ -106,7 +106,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
val u = decomposed.U
val s = decomposed.S
val v = decomposed.V
val retrank = s.data.toArray.length
val retrank = s.data.collect().length

assert(retrank == 1, "rank returned not one")

Expand Down Expand Up @@ -139,7 +139,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
val u = decomposed.U
val s = decomposed.S
val v = decomposed.V
val retrank = s.data.toArray.length
val retrank = s.data.collect().length

val densea = getDenseMatrix(a)
val svd = Singular.sparseSVD(densea)
Expand Down

0 comments on commit 9032f7c

Please sign in to comment.