Skip to content

Commit

Permalink
[SPARK-14653][ML] Remove json4s from mllib-local
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

This PR moves Vector.toJson/fromJson to ml.linalg.VectorEncoder under mllib/ to keep mllib-local's dependency minimal. The json encoding is used by Params. So we still need this feature in SPARK-14615, where we will switch to ml.linalg in spark.ml APIs.

## How was this patch tested?

Copied existing unit tests over.

cc; dbtsai

Author: Xiangrui Meng <[email protected]>

Closes apache#12802 from mengxr/SPARK-14653.
  • Loading branch information
mengxr committed Apr 30, 2016
1 parent 1192fe4 commit 0847fe4
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 62 deletions.
4 changes: 0 additions & 4 deletions mllib-local/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,6 @@
<artifactId>scalacheck_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.json4s</groupId>
<artifactId>json4s-jackson_${scala.binary.version}</artifactId>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ import scala.annotation.varargs
import scala.collection.JavaConverters._

import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
import org.json4s.DefaultFormats
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}

/**
* Represents a numeric vector, whose index type is Int and value type is Double.
Expand Down Expand Up @@ -153,11 +150,6 @@ sealed trait Vector extends Serializable {
* Returns -1 if vector has length 0.
*/
def argmax: Int

/**
* Converts the vector to a JSON string.
*/
def toJson: String
}

/**
Expand Down Expand Up @@ -233,26 +225,6 @@ object Vectors {
new DenseVector(new Array[Double](size))
}

/**
* Parses the JSON representation of a vector into a [[Vector]].
*/
def fromJson(json: String): Vector = {
implicit val formats = DefaultFormats
val jValue = parseJson(json)
(jValue \ "type").extract[Int] match {
case 0 => // sparse
val size = (jValue \ "size").extract[Int]
val indices = (jValue \ "indices").extract[Seq[Int]].toArray
val values = (jValue \ "values").extract[Seq[Double]].toArray
sparse(size, indices, values)
case 1 => // dense
val values = (jValue \ "values").extract[Seq[Double]].toArray
dense(values)
case _ =>
throw new IllegalArgumentException(s"Cannot parse $json into a vector.")
}
}

/**
* Creates a vector instance from a breeze vector.
*/
Expand Down Expand Up @@ -541,11 +513,6 @@ class DenseVector (val values: Array[Double]) extends Vector {
maxIdx
}
}

override def toJson: String = {
val jValue = ("type" -> 1) ~ ("values" -> values.toSeq)
compact(render(jValue))
}
}

object DenseVector {
Expand Down Expand Up @@ -724,14 +691,6 @@ class SparseVector (
}.unzip
new SparseVector(selectedIndices.length, sliceInds.toArray, sliceVals.toArray)
}

override def toJson: String = {
val jValue = ("type" -> 0) ~
("size" -> size) ~
("indices" -> indices.toSeq) ~
("values" -> values.toSeq)
compact(render(jValue))
}
}

object SparseVector {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package org.apache.spark.ml.linalg
import scala.util.Random

import breeze.linalg.{squaredDistance => breezeSquaredDistance, DenseMatrix => BDM}
import org.json4s.jackson.JsonMethods.{parse => parseJson}

import org.apache.spark.ml.SparkMLFunSuite
import org.apache.spark.ml.util.TestingUtils._
Expand Down Expand Up @@ -339,20 +338,4 @@ class VectorsSuite extends SparkMLFunSuite {
assert(v.slice(Array(2, 0)) === new SparseVector(2, Array(0), Array(2.2)))
assert(v.slice(Array(2, 0, 3, 4)) === new SparseVector(4, Array(0, 3), Array(2.2, 4.4)))
}

test("toJson/fromJson") {
val sv0 = Vectors.sparse(0, Array.empty, Array.empty)
val sv1 = Vectors.sparse(1, Array.empty, Array.empty)
val sv2 = Vectors.sparse(2, Array(1), Array(2.0))
val dv0 = Vectors.dense(Array.empty[Double])
val dv1 = Vectors.dense(1.0)
val dv2 = Vectors.dense(0.0, 2.0)
for (v <- Seq(sv0, sv1, sv2, dv0, dv1, dv2)) {
val json = v.toJson
parseJson(json) // `json` should be a valid JSON string
val u = Vectors.fromJson(json)
assert(u.getClass === v.getClass, "toJson/fromJson should preserve vector types.")
assert(u === v, "toJson/fromJson should preserve vector values.")
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.ml.linalg

import org.json4s.DefaultFormats
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}

private[ml] object JsonVectorConverter {

/**
* Parses the JSON representation of a vector into a [[Vector]].
*/
def fromJson(json: String): Vector = {
implicit val formats = DefaultFormats
val jValue = parseJson(json)
(jValue \ "type").extract[Int] match {
case 0 => // sparse
val size = (jValue \ "size").extract[Int]
val indices = (jValue \ "indices").extract[Seq[Int]].toArray
val values = (jValue \ "values").extract[Seq[Double]].toArray
Vectors.sparse(size, indices, values)
case 1 => // dense
val values = (jValue \ "values").extract[Seq[Double]].toArray
Vectors.dense(values)
case _ =>
throw new IllegalArgumentException(s"Cannot parse $json into a vector.")
}
}

/**
* Coverts the vector to a JSON string.
*/
def toJson(v: Vector): String = {
v match {
case SparseVector(size, indices, values) =>
val jValue = ("type" -> 0) ~
("size" -> size) ~
("indices" -> indices.toSeq) ~
("values" -> values.toSeq)
compact(render(jValue))
case DenseVector(values) =>
val jValue = ("type" -> 1) ~ ("values" -> values.toSeq)
compact(render(jValue))
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.ml.linalg

import org.json4s.jackson.JsonMethods.parse

import org.apache.spark.SparkFunSuite

class JsonVectorConverterSuite extends SparkFunSuite {

test("toJson/fromJson") {
val sv0 = Vectors.sparse(0, Array.empty, Array.empty)
val sv1 = Vectors.sparse(1, Array.empty, Array.empty)
val sv2 = Vectors.sparse(2, Array(1), Array(2.0))
val dv0 = Vectors.dense(Array.empty[Double])
val dv1 = Vectors.dense(1.0)
val dv2 = Vectors.dense(0.0, 2.0)
for (v <- Seq(sv0, sv1, sv2, dv0, dv1, dv2)) {
val json = JsonVectorConverter.toJson(v)
parse(json) // `json` should be a valid JSON string
val u = JsonVectorConverter.fromJson(json)
assert(u.getClass === v.getClass, "toJson/fromJson should preserve vector types.")
assert(u === v, "toJson/fromJson should preserve vector values.")
}
}
}

0 comments on commit 0847fe4

Please sign in to comment.