Skip to content

Commit

Permalink
PDIO-300.
Browse files Browse the repository at this point in the history
1. Implement unittests for feature based itemrec, both batch and realtime
mode.
2. Refactor feature based code.
3. Specify some default values for appdata and settings.
  • Loading branch information
Justin Yip committed May 2, 2014
1 parent c718a3c commit 392a0a8
Show file tree
Hide file tree
Showing 11 changed files with 652 additions and 100 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ case class U2IAction(
uid: String,
iid: String,
t: DateTime,
latlng: Option[Tuple2[Double, Double]],
v: Option[Int],
price: Option[Double])
latlng: Option[Tuple2[Double, Double]] = None,
v: Option[Int] = None,
price: Option[Double] = None)

/** Base trait for implementations that interact with user-to-item actions in the backend app data store. */
trait U2IActions {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ case class User(
id: String,
appid: Int,
ct: DateTime,
latlng: Option[Tuple2[Double, Double]],
inactive: Option[Boolean],
attributes: Option[Map[String, Any]])
latlng: Option[Tuple2[Double, Double]] = None,
inactive: Option[Boolean] = None,
attributes: Option[Map[String, Any]] = None)

/** Base trait for implementations that interact with users in the backend app data store. */
trait Users {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ case class Algo(
name: String,
infoid: String,
command: String,
params: Map[String, Any],
settings: Map[String, Any],
params: Map[String, Any] = Map(),
settings: Map[String, Any] = Map(),
modelset: Boolean,
createtime: DateTime,
updatetime: DateTime,
status: String = "",
offlineevalid: Option[Int],
offlineevalid: Option[Int] = None,
offlinetuneid: Option[Int] = None,
loop: Option[Int] = None,
paramset: Option[Int] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ case class App(
userid: Int,
appkey: String,
display: String,
url: Option[String],
cat: Option[String],
desc: Option[String],
timezone: String)
url: Option[String] = Some(""),
cat: Option[String] = Some(""),
desc: Option[String] = Some(""),
timezone: String = "UTC")

/** Base trait for implementations that interact with apps in the backend data store. */
trait Apps extends Common {
Expand Down
18 changes: 9 additions & 9 deletions dist/conf/init.json
Original file line number Diff line number Diff line change
Expand Up @@ -5565,10 +5565,10 @@
"name": "Feature Based, Batch Mode. (Experimental)",
"description": "Item recommendation based on item's feature vector (itypes). This engine build a user preference model based on high ratings items (rate > 3).",
"batchcommands": [
"$base$/bin/itemrec.featurebased.Batch --appid $appid$ --algoid $algoid$ --modelSet $modelset$ --numRecommendations $numRecommendations$ --whiteItypes $whiteItypes$"
"$base$/bin/itemrec.featurebased.Batch --appid $appid$ --algoid $algoid$ --modelSet $modelset$ --numRecommendations $numRecommendations$ --featureItypes $featureItypes$"
],
"offlineevalcommands": [
"$base$/bin/itemrec.featurebased.Batch --appid $appid$ --algoid $algoid$ --modelSet $modelset$ --numRecommendations $numRecommendations$ --whiteIteyps $whiteItypes$"
"$base$/bin/itemrec.featurebased.Batch --appid $appid$ --algoid $algoid$ --modelSet $modelset$ --numRecommendations $numRecommendations$ --featureItypes $featureItypes$"
],
"paramorder": [],
"engineinfoid": "itemrec",
Expand All @@ -5577,8 +5577,8 @@
"Users, Items, and U2I Actions."
],
"params": {
"whiteItypes": {
"name": "Whitelisted Itypes",
"featureItypes": {
"name": "Feature Itypes",
"description": "A comma-delimited list of itypes that is used to construct the preference model. If not specified, all itypes found in items will be used.",
"constraint": {
"paramtype": "string"
Expand All @@ -5594,7 +5594,7 @@
"name": "Parameter Settings",
"sectiontype": "normal",
"params": [
"whiteItypes"
"featureItypes"
]
}
]
Expand All @@ -5603,7 +5603,7 @@
"name": "Feature Based, Realtime Mode. (Experimental)",
"description": "Item recommendation based on item's feature vector (itypes). This engine build a user preference model based on high ratings items (rate > 3).",
"batchcommands": [
"$base$/bin/itemrec.featurebased.Realtime --appid $appid$ --algoid $algoid$ --modelSet $modelset$ --numRecommendations $numRecommendations$ --whiteItypes $whiteItypes$"
"$base$/bin/itemrec.featurebased.Realtime --appid $appid$ --algoid $algoid$ --modelSet $modelset$ --numRecommendations $numRecommendations$ --featureItypes $featureItypes$"
],
"offlineevalcommands": [
"$base$/bin/itemrec.featurebased.Realtime --appid $appid$ --algoid $algoid$ --modelSet $modelset$ --numRecommendations $numRecommendations$ --whiteItypes $whiteItypes$"
Expand All @@ -5615,8 +5615,8 @@
"Users, Items, and U2I Actions."
],
"params": {
"whiteItypes": {
"name": "Whitelisted Itypes",
"featureItypes": {
"name": "Feature Itypes",
"description": "A comma-delimited list of itypes that is used to construct the preference model. If not specified, all itypes found in items will be used.",
"constraint": {
"paramtype": "string"
Expand All @@ -5632,7 +5632,7 @@
"name": "Parameter Settings",
"sectiontype": "normal",
"params": [
"whiteItypes"
"featureItypes"
]
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,8 @@ packJvmOpts := Map(
"itemrec.featurebased.Realtime" -> Common.packCommonJvmOpts
)


testOptions in Test += Tests.Argument("failtrace")

// Can exclude some paths
// testOptions in Test += Tests.Argument("tracefilter", "/io.prediction.algorithms.itemrec.featurebased.CustomMatcher")
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,23 @@ object UserProfileRecommendationBatch {
}}
}

def run(appid: Int, algoid: Int, modelset: Boolean,
numRecommendations: Int,
optFeatureItypesStr: Option[String]) = {
val (userFeaturesMap, featureItypes, itemItypesMap) = (
UserProfileRecommendation.constructUserFeaturesMapFromArg(
appid, optFeatureItypesStr))

val userRecommendationsMap = UserProfileRecommendation.recommend(
userFeaturesMap,
featureItypes,
itemItypesMap,
numRecommendations
)

modelCon(appid, algoid, modelset, userRecommendationsMap, itemItypesMap)
}

def main(cmdArgs: Array[String]) = {
val args = Args(cmdArgs)

Expand All @@ -72,34 +89,16 @@ object UserProfileRecommendationBatch {
val modelset = args("modelSet").toBoolean
val numRecommendations = args.optional("numRecommendations")
.getOrElse("10").toInt
val verbose = args.optional("verbose").getOrElse("false").toBoolean
val optWhiteItypesStr = args.optional("whiteItypes")

// Recommendation
val (itypes, itemTypesMap) = UserProfileRecommendation.getItems(appid)

val whiteItypes = UserProfileRecommendation.getWhiteItypes(
itypes, optWhiteItypesStr)

val whiteInvItypes = (0 until whiteItypes.length)
.map(i => (whiteItypes(i), i)).toMap

val userU2IsMap = UserProfileRecommendation.getU2I(appid)

val userFeaturesMap = UserProfileRecommendation.constructUserFeatureMap(
whiteInvItypes, itemTypesMap, userU2IsMap)

val userRecommendationsMap = UserProfileRecommendation.recommend(
userFeaturesMap, itemTypesMap, itypes, whiteInvItypes,
userFeaturesMap.keys.toSeq, numRecommendations)

// Model Construction
modelCon(appid, algoid, modelset, userRecommendationsMap, itemTypesMap)
val optFeatureItypesStr = args.optional("featureItypes")

run(appid, algoid, modelset, numRecommendations,
optFeatureItypesStr)
/*
if (verbose) {
UserProfileRecommendation.printRecommendations(
userFeaturesMap, whiteItypes,
userRecommendationsMap, itemTypesMap)
userFeaturesMap, featureItypes,
userRecommendationsMap, itemItypesMap)
}
*/
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,46 @@ package io.prediction.algorithms.itemrec.featurebased
import grizzled.slf4j.Logger
import io.prediction.commons.Config
import com.twitter.scalding.Args
import io.prediction.commons.appdata.U2IAction
import io.prediction.commons.appdata.{ Item, User, U2IAction }
import io.prediction.commons.modeldata.ItemRecScore

class UserProfileRecommendationException(msg: String = null, cause: Throwable=null)
extends RuntimeException(msg, cause)


// Only consider items rated >= 3
object UserProfileRecommendation {
val logger = Logger(UserProfileRecommendation.getClass)
val commonsConfig = new Config

// Return itypes if whiteItypesStr is emtpy
// Otherwise, return only their intersection.
def getWhiteItypes(itypes: Seq[String], optWhiteItypesStr: Option[String])
// Return itypes if whiteItypesStr is empty or opt is None
// If WhiteItypesStr is specify, return their intersection, using whiteItypes
// order.
def getFeatureItypes(itypes: Seq[String], optFeatureItypesStr: Option[String])
: Seq[String] = {
if (optWhiteItypesStr.isEmpty)
if (optFeatureItypesStr.isEmpty)
return itypes

val featureItypesStr = optFeatureItypesStr.get
if (featureItypesStr == "")
return itypes

val whiteItypeSet = optWhiteItypesStr.get.split(',').toSet
itypes.filter{ itypes => whiteItypeSet.contains(itypes) }
val itypeSet = itypes.toSet
featureItypesStr.split(',').filter{
featureItype => itypeSet.contains(featureItype)
}
}

def getUsers(appid: Int): Seq[User] = {
val usersDb = commonsConfig.getAppdataUsers
usersDb.getByAppid(appid).toSeq
}

// distinct itypes
// iid -> itypes
def getItems(appid: Int): (Seq[String], Map[String, Seq[String]]) = {
val itemsDb = commonsConfig.getAppdataItems

// FIXME(yipjustin) filter by startT, endT.
val itemTypesMap = itemsDb.getByAppid(appid)
.map(item => (item.id, item.itypes)).toMap

Expand All @@ -41,73 +56,114 @@ object UserProfileRecommendation {
u2iDb.getAllByAppid(appid).toSeq.groupBy(_.uid)
}

// Only whiteInvItypes is used.
// Only featureInvItypes is used.
def constructUserFeatureMap(
whiteInvItypes: Map[String, Int],
featureInvItypes: Map[String, Int],
itemTypesMap: Map[String, Seq[String]],
uidList: Seq[String],
userU2IsMap: Map[String, Seq[U2IAction]]) : Map[String, Seq[Double]] = {

val userFeatureMap = userU2IsMap.map{ case(user, u2is) => {
//val userFeatureMap = userU2IsMap.map{ case(user, u2is) => {
val userFeatureMap = uidList.map{ user => {
val u2is = userU2IsMap.getOrElse(user, Seq[U2IAction]())

// TODO. Discount early actions
val userFeatureList = u2is
.filter(_.action == "rate")
.filter(_.v.getOrElse(0) > 3)
.filter(u2i => itemTypesMap.contains(u2i.iid))
.map{ u2i => {
// Only populate the whitelisted itypes
val feature = new Array[Int](whiteInvItypes.size)
// Only populate the featurelisted itypes
//val feature = new Array[Int](featureInvItypes.size)
val feature = Array.fill[Int](featureInvItypes.size)(0)
itemTypesMap(u2i.iid)
.filter(whiteInvItypes.contains)
.foreach(e => feature(whiteInvItypes(e)) = 1)
.filter(featureInvItypes.contains)
.foreach(e => feature(featureInvItypes(e)) = 1)

feature
}}

val userFeature = (
if (userFeatureList.length > 0) {
userFeatureList.transpose.map(_.sum).toList
val sumUserFeature = userFeatureList.transpose.map(_.sum).toList
if (sumUserFeature.sum == 0) {
// This happens when all items rated by the user have no interested
// itypes. In such case we have to fill null.
Seq.fill(featureInvItypes.size)(1)
} else {
sumUserFeature
}
} else {
// For user has no feature, assumes uniform.
Seq.fill(whiteInvItypes.size)(1)
Seq.fill(featureInvItypes.size)(1)
}
)

val featureSum = userFeature.sum
val normalizedUserFeature = userFeature.map(_.toDouble / featureSum)

//println(user + " : " + normalizedUserFeature)
(user, normalizedUserFeature.toList)
}}.toMap
userFeatureMap
}

// Notice that the third return value is item to *all* its itypes. It is
// important to pass non-feature itypes since we need these info in modeldata
// for other pruning.
def constructUserFeaturesMapFromArg(
appid: Int,
optFeatureItypesStr: Option[String]) : (
Map[String, Seq[Double]], // User -> Itype Scores
Seq[String], // Itype Feature List
Map[String, Seq[String]] // Item -> All Itypes
) = {
val (itypes, itemTypesMap) = getItems(appid)

//val usersDb = commonsConfig.getAppdataUsers
val users = getUsers(appid).map(_.id)

val featureItypes = getFeatureItypes(itypes, optFeatureItypesStr)
//println(featureItypes)
if (featureItypes.length == 0) {
throw new UserProfileRecommendationException("No items has featurelisted types")
}

val featureInvItypes = (0 until featureItypes.length)
.map(i => (featureItypes(i), i)).toMap

val userU2IsMap = getU2I(appid)

val userFeaturesMap = constructUserFeatureMap(
featureInvItypes, itemTypesMap, users, userU2IsMap)

return (userFeaturesMap, featureItypes, itemTypesMap)
}

def printFeature(feature: Seq[Double], itypes: Seq[String]): String = {
(feature.zip(itypes)).filter(_._1 > 0)
.map(e => f"${e._2}=${e._1}%.4f").reduce(_ + "," + _)
}

def recommend(
userFeaturesMap: Map[String, Seq[Double]],
itemTypesMap: Map[String, Seq[String]],
allItypes: Seq[String],
whiteInvItypes: Map[String, Int],
users: Seq[String],
featureItypes: Seq[String],
itemItypes: Map[String, Seq[String]],
numRecommendations: Int
) : Map[String, Seq[(String, Double)]] = {

users.map { uid => {
val userFeatures = userFeaturesMap(uid)
val itemScoreMap = itemTypesMap.map{ case(iid, itypes) => {
val featureIdxMap = featureItypes.zip(0 until featureItypes.size).toMap

userFeaturesMap.map { case(uid, features) => {
val itemScoreMap = itemItypes.map{ case(iid, itypes) => {
val score = itypes
.filter(whiteInvItypes.contains)
.map(itype => whiteInvItypes(itype))
.map(idx => userFeatures(idx))
.sum
.filter(featureIdxMap.contains)
.map(itype => featureIdxMap(itype))
.map(idx => features(idx))
.sum
// FIXME: not decided yet. if item has too many types, need to discount
// them
// / itypes.size
(iid, score)
// them / itypes.size
(iid, score)
}}

val top = itemScoreMap.toList.sortBy(-_._2).take(numRecommendations)
(uid, top)
}}.toMap
Expand Down
Loading

0 comments on commit 392a0a8

Please sign in to comment.