Skip to content

Commit

Permalink
PDIO-267 Fixed incorrect score ranking in mahout model con
Browse files Browse the repository at this point in the history
  • Loading branch information
Kenneth Chan committed Jan 29, 2014
1 parent 1e027fd commit 9c90b33
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,9 @@ class ModelConstructor(args: Args) extends Job(args) {
* computation
*/

val seenRatings = ratingSource.read
val seenRatings = ratingSource.read.mapTo(('uindexR, 'iindexR, 'ratingR) -> ('uindexR, 'iindexR, 'ratingR)) {
fields: (String, String, Double) => fields // convert score from String to Double
}

// convert to (uindex, iindex, rating) format
// and filter seen items from predicted
Expand Down Expand Up @@ -126,11 +128,21 @@ class ModelConstructor(args: Args) extends Job(args) {
[0:2.0]
[16:3.0]
*/
def parsePredictedData(data: String): List[(String, String)] = {
def parsePredictedData(data: String): List[(String, Double)] = {
val dataLen = data.length
data.take(dataLen - 1).tail.split(",").toList.map { ratingData =>
val ratingDataArray = ratingData.split(":")
(ratingDataArray(0), ratingDataArray(1))
val item = ratingDataArray(0)
val rating: Double = try {
ratingDataArray(1).toDouble
} catch {
case e: Exception =>
{
assert(false, s"Cannot convert rating value of item ${item} to double: " + ratingDataArray + ". Exception: " + e)
}
0.0
}
(item, rating)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,22 @@ class ModelConstructorTest extends Specification with TupleConversions {

}

val test2Items = List(("0", "i0", "t1,t2,t3"), ("1", "i1", "t1,t2"), ("2", "i2", "t2,t3"), ("3", "i3", "t2"))

val test2Users = List(("0", "u0"), ("1", "u1"), ("2", "u2"), ("3", "u3"))

val test2Predicted = List(("0", "[1:123,2:9]"), ("1", "[0:1]"))

val test2Ratings = List(("0", "0", "2"), ("0", "3", "88"))

val test2Output = List(
("u0", "i1,i3,i2,i0", "123.0,88.0,9.0,2.0", "[t1,t2],[t2],[t2,t3],[t1,t2,t3]"),
("u1", "i0", "1.0", "[t1,t2,t3]"))

"mahout.itemrec.itembased ModelConstructor with unseenOnly=false and numRecommendations=100 (score should not be compared as string)" should {

test(false, 100, test2Items, test2Users, test2Predicted, test2Ratings, test2Output)

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ class ModelConstructor(args: Args) extends Job(args) {
* source
*/
val similarities = Tsv(AlgoFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "similarities.tsv"), ('iindex, 'simiindex, 'score)).read
.mapTo(('iindex, 'simiindex, 'score) -> ('iindex, 'simiindex, 'score)) {
fields: (String, String, Double) => fields // convert score from String to Double
}

val itemsIndex = Tsv(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "itemsIndex.tsv")).read
.mapTo((0, 1, 2) -> ('iindexI, 'iidI, 'itypesI)) { fields: (String, String, String) =>
Expand All @@ -87,7 +90,7 @@ class ModelConstructor(args: Args) extends Job(args) {
.joinWithSmaller('simiindex -> 'iindexI, itemsIndex)

val sim1 = sim.project('iid, 'iidI, 'itypesI, 'score)
val sim2 = sim.mapTo(('iidI, 'iid, 'itypes, 'score) -> ('iid, 'iidI, 'itypesI, 'score)) { fields: (String, String, List[String], String) => fields }
val sim2 = sim.mapTo(('iidI, 'iid, 'itypes, 'score) -> ('iid, 'iidI, 'itypesI, 'score)) { fields: (String, String, List[String], Double) => fields }

val combinedSimilarities = sim1 ++ sim2

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,26 @@ class ModelConstructorTest extends Specification with TupleConversions {

}

val test2Items = List(("0", "i0", "t1,t2,t3"), ("1", "i1", "t1,t2"), ("2", "i2", "t2,t3"), ("3", "i3", "t2"))

val test2Similarities = List(
("0", "1", "83"),
("0", "2", "200"),
("0", "3", "4"),
("1", "2", "9"),
("1", "3", "68"),
("2", "3", "1000"))

val test2Output = List(
("i0", "i2,i1,i3", "200.0,83.0,4.0", "[t2,t3],[t1,t2],[t2]"),
("i1", "i0,i3,i2", "83.0,68.0,9.0", "[t1,t2,t3],[t2],[t2,t3]"),
("i2", "i3,i0,i1", "1000.0,200.0,9.0", "[t2],[t1,t2,t3],[t1,t2]"),
("i3", "i2,i1,i0", "1000.0,68.0,4.0", "[t2,t3],[t1,t2],[t1,t2,t3]"))

"mahout.itemsim ModelConstructor (score should not be compared as string)" should {

test(100, test2Items, test2Similarities, test2Output)

}

}

0 comments on commit 9c90b33

Please sign in to comment.