Skip to content

Commit

Permalink
Reco ranking (online-ml#832)
Browse files Browse the repository at this point in the history
* expose forecasting metrics

* rename recommender to ranker

* Update content-personalization.ipynb

* activate ranker tests

* Update label_combination_hoeffding_tree.py

* Update label_combination_hoeffding_tree.py
  • Loading branch information
MaxHalford authored Jan 30, 2022
1 parent 3732f70 commit d7800b8
Show file tree
Hide file tree
Showing 15 changed files with 293 additions and 177 deletions.
221 changes: 184 additions & 37 deletions docs/examples/content-personalization.ipynb

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions docs/releases/unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

## reco

- Renamed the `Recommender` base class into `Ranker`.
- Added a `rank` method to each recommender.
- Removed `reco.SurpriseWrapper` as it wasn't really useful.
- Added a `recommend` method to each recommender.
- Added an `is_contextual` property to each recommender.
- Added an `is_contextual` property to each ranker to indicate if a model makes use of contextual features or not.
24 changes: 12 additions & 12 deletions river/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from river.base import Estimator
from river.model_selection import ModelSelector
from river.reco import Recommender
from river.reco import Ranker

from . import clf, common, model_selection, reco

Expand Down Expand Up @@ -54,18 +54,18 @@ def _yield_datasets(model: Estimator):

# Recommendation models can be regressors or classifiers, but they have requirements as to the
# structure of the data
if isinstance(utils.inspect.extract_relevant(model), Recommender):
if isinstance(utils.inspect.extract_relevant(model), Ranker):
if utils.inspect.isregressor(model):
yield _DummyDataset(
({"user": "Alice", "item": "Superman"}, 8),
({"user": "Alice", "item": "Terminator"}, 9),
({"user": "Alice", "item": "Star Wars"}, 8),
({"user": "Alice", "item": "Notting Hill"}, 2),
({"user": "Alice", "item": "Harry Potter"}, 5),
({"user": "Bob", "item": "Superman"}, 8),
({"user": "Bob", "item": "Terminator"}, 9),
({"user": "Bob", "item": "Star Wars"}, 8),
({"user": "Bob", "item": "Notting Hill"}, 2),
("Alice", "Superman", 8),
("Alice", "Terminator", 9),
("Alice", "Star Wars", 8),
("Alice", "Notting Hill", 2),
("Alice", "Harry Potter", 5),
("Bob", "Superman", 8),
("Bob", "Terminator", 9),
("Bob", "Star Wars", 8),
("Bob", "Notting Hill", 2),
)
return

Expand Down Expand Up @@ -162,7 +162,7 @@ def yield_checks(model: Estimator) -> Iterator[callable]:
if isinstance(utils.inspect.extract_relevant(model), ModelSelector):
checks.append(model_selection.check_model_selection_order_does_not_matter)

if isinstance(utils.inspect.extract_relevant(model), Recommender):
if isinstance(utils.inspect.extract_relevant(model), Ranker):
yield reco.check_reco_routine

for check in checks:
Expand Down
6 changes: 3 additions & 3 deletions river/checks/reco.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import random


def check_reco_routine(recommender):
def check_reco_routine(ranker):

users = ["Tom", "Anna"]
items = {"politics", "sports", "music", "food", "finance", "health", "camping"}
Expand All @@ -15,8 +15,8 @@ def get_reward(user, item) -> bool:
for i in range(100):

user = random.choice(users)
item = recommender.recommend(user, k=1, items=items, strategy="best")[0]
item = ranker.rank(user, items)[0]

clicked = get_reward(user, item)

recommender.learn_one({"user": user, "item": item}, clicked)
ranker.learn_one(user, item, clicked)
22 changes: 19 additions & 3 deletions river/reco/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
"""Recommender systems."""
from .base import Recommender
"""Recommender systems module.
Recommender systems (recsys for short) is a large topic. This module is far from comprehensive. It
simply provides models which can contribute towards building a recommender system.
A typical recommender system is made up of a retrieval phase, followed by a ranking phase. The
output of the retrieval phase is a shortlist of the catalogue of items. The items in the shortlist
are then usually ranked according to the expected preference the user will have for each item. This
module focuses on the ranking phase.
Models which inherit from the `Ranker` class have a `rank` method. This allows sorting a set of
items for a given user. Each model also has a `learn_one(user, item, y, context)` which allows
learning user preferences. The `y` parameter is a reward value, the nature of which depends is
specific to each and every recommendation task. Typically the reward is a number or a boolean
value. It is up to the user to determine how to translate a user session into training data.
"""
from .base import Ranker
from .baseline import Baseline
from .biased_mf import BiasedMF
from .funk_mf import FunkMF
from .normal import RandomNormal

__all__ = ["Baseline", "BiasedMF", "FunkMF", "RandomNormal", "Recommender"]
__all__ = ["Baseline", "BiasedMF", "FunkMF", "RandomNormal", "Ranker"]
87 changes: 18 additions & 69 deletions river/reco/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@
import random
import typing

import numpy as np

from river import base

ID = typing.Union[str, int]
Reward = typing.Union[numbers.Number, bool]


class Recommender(base.Estimator):
"""Base class for recommendation models.
class Ranker(base.Estimator):
"""Base class for ranking models.
Parameters
----------
Expand All @@ -24,31 +22,13 @@ class Recommender(base.Estimator):
def __init__(self, seed: int = None):
self.seed = seed
self._rng = random.Random(seed)
self._numpy_rng = np.random.RandomState(seed)
self._items = set()

@property
def is_contextual(self):
return False

def learn_one(self, x, y: Reward):
x = x.copy()
user = x.pop("user")
item = x.pop("item")
self._items.add(item)
return self._learn_user_item(user, item, context=x, reward=y)

def predict_one(self, x) -> float:
x = x.copy()
user = x.pop("user")
item = x.pop("item")
self._items.add(item)
return self._predict_user_item(user, item, context=x)

@abc.abstractmethod
def _learn_user_item(
self, user: ID, item: ID, context: typing.Optional[dict], reward: Reward
) -> "Recommender":
def learn_one(self, user: ID, item: ID, y: Reward, x: dict = None):
"""Fits a `user`-`item` pair and a real-valued target `y`.
Parameters
Expand All @@ -57,17 +37,15 @@ def _learn_user_item(
A user ID.
item
An item ID.
context
Side information.
reward
Feedback from the user for this item.
y
Reward feedback from the user for the item. This may be a boolean or a number.
x
Optional context to use.
"""

@abc.abstractmethod
def _predict_user_item(
self, user: ID, item: ID, context: typing.Optional[dict]
) -> float:
def predict_one(self, user: ID, item: ID, x: dict = None) -> Reward:
"""Predicts the target value of a set of features `x`.
Parameters
Expand All @@ -76,59 +54,30 @@ def _predict_user_item(
A user ID.
item
An item ID.
context
Side information.
x
Optional context to use.
Returns
-------
The predicted rating.
The predicted preference from the user for the item.
"""

def recommend(
self,
user: ID,
k=1,
context: typing.Optional[dict] = None,
items: typing.Optional[typing.Set[ID]] = None,
strategy="best",
) -> typing.List[ID]:
"""Recommend k items to a user.
def rank(self, user: ID, items: typing.Set[ID], x: dict = None) -> typing.List[ID]:
"""Rank models by decreasing order of preference for a given user.
Parameters
----------
user
A user ID.
k
The number of items to recommend.
context
Side information.
items
An optional set of items that should be considered. Every seen item will be considered
if this isn't specified.
strategy
The strategy used to select which items to recommend once they've been scored.
A set of items to rank.
x
Optional context to use.
"""

items = list(items or self._items)
if not items:
return []

# Evaluate the preference of each user towards each time given the context
preferences = [
self._predict_user_item(user, item, context=context) for item in items
]

# Apply the selection strategy
if strategy == "best":
return [item for _, item in sorted(zip(preferences, items), reverse=True)][
:k
]

raise ValueError(
f"{strategy} is not a valid value for strategy, must be one of: best"
)
preferences = {item: self.predict_one(user, item, x) for item in items}
return sorted(preferences, key=preferences.get, reverse=True)

def _unit_test_skips(self):
return {"check_emerging_features", "check_disappearing_features"}
20 changes: 9 additions & 11 deletions river/reco/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
import copy
import typing

from river import base, optim, stats, utils
from river import optim, stats, utils

from .base import Recommender
from .base import Ranker

__all__ = ["Baseline"]


class Baseline(Recommender, base.Regressor):
class Baseline(Ranker):
"""Baseline for recommender systems.
A first-order approximation of the bias involved in target. The model equation is defined as:
Expand Down Expand Up @@ -70,9 +70,9 @@ class Baseline(Recommender, base.Regressor):
>>> model = reco.Baseline(optimizer=optim.SGD(0.005))
>>> for x, y in dataset:
... _ = model.learn_one(x, y)
... _ = model.learn_one(**x, y=y)
>>> model.predict_one({'user': 'Bob', 'item': 'Harry Potter'})
>>> model.predict_one(user='Bob', item='Harry Potter')
6.538120
References
Expand Down Expand Up @@ -115,18 +115,16 @@ def __init__(
int, optim.initializers.Initializer
] = collections.defaultdict(initializer)

def _predict_user_item(self, user, item, context):
def predict_one(self, user, item, x=None):
return self.global_mean.get() + self.u_biases[user] + self.i_biases[item]

def _learn_user_item(self, user, item, context, reward):
def learn_one(self, user, item, y, x=None):

# Update the global mean
self.global_mean.update(reward)
self.global_mean.update(y)

# Calculate the gradient of the loss with respect to the prediction
g_loss = self.loss.gradient(
reward, self._predict_user_item(user, item, context)
)
g_loss = self.loss.gradient(y, self.predict_one(user, item))

# Clamp the gradient to avoid numerical instability
g_loss = utils.math.clamp(
Expand Down
20 changes: 9 additions & 11 deletions river/reco/biased_mf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

import numpy as np

from river import base, optim, stats, utils
from river import optim, stats, utils

from .base import Recommender
from .base import Ranker

__all__ = ["BiasedMF"]


class BiasedMF(Recommender, base.Regressor):
class BiasedMF(Ranker):
"""Biased Matrix Factorization for recommender systems.
The model equation is defined as:
Expand Down Expand Up @@ -99,9 +99,9 @@ class BiasedMF(Recommender, base.Regressor):
... )
>>> for x, y in dataset:
... _ = model.learn_one(x, y)
... _ = model.learn_one(**x, y=y)
>>> model.predict_one({'user': 'Bob', 'item': 'Harry Potter'})
>>> model.predict_one(user='Bob', item='Harry Potter')
6.489025
References
Expand Down Expand Up @@ -174,7 +174,7 @@ def __init__(
int, optim.initializers.Initializer
] = collections.defaultdict(random_latents)

def _predict_user_item(self, user, item, context):
def predict_one(self, user, item, x=None):

# Initialize the prediction to the mean
y_pred = self.global_mean.get()
Expand All @@ -190,15 +190,13 @@ def _predict_user_item(self, user, item, context):

return y_pred

def _learn_user_item(self, user, item, context, reward):
def learn_one(self, user, item, y, x=None):

# Update the global mean
self.global_mean.update(reward)
self.global_mean.update(y)

# Calculate the gradient of the loss with respect to the prediction
g_loss = self.loss.gradient(
reward, self._predict_user_item(user, item, context)
)
g_loss = self.loss.gradient(y, self.predict_one(user, item))

# Clamp the gradient to avoid numerical instability
g_loss = utils.math.clamp(
Expand Down
Loading

0 comments on commit d7800b8

Please sign in to comment.