Merge pull request #9 from domainxz/feature/self_attention

Feature/self attention
domainxz · Dec 29, 2019 · 535cdbc · 535cdbc
2 parents 5dfa572 + d960146
commit 535cdbc
Show file tree

Hide file tree

Showing 12 changed files with 587 additions and 473 deletions.
diff --git a/.gitignore b/.gitignore
@@ -91,3 +91,5 @@ ENV/
 embed/*/*.dat
 # PyCharm
 .idea/
+# Test scripts
+test*.py
diff --git a/single/__init__.py b/single/__init__.py
@@ -0,0 +1,9 @@
+from .rec import REC
+from .bpr import BPR
+from .vbpr import VBPR
+from .wmf import WMF
+from .dpm import DPM
+from .cer import CER
+
+from .encoder import ENCODER
+from .mlp import MLP
diff --git a/single/bpr.py b/single/bpr.py
@@ -3,125 +3,132 @@
     Sampling Method : uniform item sampling per user
     Author          : Xingzhong Du
     E-mail          : [email protected]
-    Reference       : "BPR : Bayesian Personalized Ranking from Implicit Feedback", Ste en Rendle, Christoph Freudenthaler, Zeno Gantner and Lars Schmidt-Thieme
+    Reference       : "BPR : Bayesian Personalized Ranking from Implicit Feedback", Steven Rendle, etc.
 """
 
-from abc import ABC, abstractmethod
 from collections import defaultdict
+from .rec import REC
 import numpy as np
-import os
 import sys
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
 import time
-from utils import get_id_dict_from_file, get_data_from_file, export_embed_to_file
+from utils import tprint, get_id_dict_from_file, get_data_from_file
 
-class BPR(ABC):
-    def __init__(self, k, lambda_u=2.5e-3, lambda_i=2.5e-3, lambda_j=2.5e-4, lambda_b=0, lr=1.0e-4, mode='l2'):
-        self.k    = k;
-        self.lu   = lambda_u;
-        self.li   = lambda_i;
-        self.lj   = lambda_j;
-        self.lb   = lambda_b;
-        self.lr   = lr;
-        self.tf_config = tf.ConfigProto();
-        self.tf_config.gpu_options.allow_growth=True;
-        self.mode = mode;
-
-    def load_training_data(self, training_file, uid_file, iid_file, data_copy=False):
-        print ('Load training data from %s'%(training_file));
-        self.uids = get_id_dict_from_file(uid_file);
-        self.iids = get_id_dict_from_file(iid_file);
-        self.data = get_data_from_file(training_file, self.uids, self.iids);
-        self.epoch_sample_limit = len(self.data);
-        self.n_users = len(self.uids);
-        self.n_items = len(self.iids);
-        self.tr_data = self._data_to_training_dict(self.data, self.uids, self.iids);
-        self.tr_users = list(self.tr_data.keys());
+
+class BPR(REC):
+    def __init__(self, k: int, lambda_u: float = 2.5e-3, lambda_i: float = 2.5e-3, lambda_j: float = 2.5e-4, lambda_b: float = 0, lr: float = 1.0e-4, mode: str = 'l2') -> None:
+        self.k    = k
+        self.lu   = lambda_u
+        self.li   = lambda_i
+        self.lj   = lambda_j
+        self.lb   = lambda_b
+        self.lr   = lr
+        self.tf_config = tf.ConfigProto()
+        self.tf_config.gpu_options.allow_growth = True
+        self.mode = mode
+
+    def load_training_data(self, training_file: str, uid_file: str, iid_file: str, data_copy: bool = False) -> None:
+        tprint('Load training data from %s' % (training_file))
+        self.uids = get_id_dict_from_file(uid_file)
+        self.iids = get_id_dict_from_file(iid_file)
+        self.data = get_data_from_file(training_file, self.uids, self.iids)
+        self.epoch_sample_limit = len(self.data)
+        self.n_users = len(self.uids)
+        self.n_items = len(self.iids)
+        self.tr_data = self._data_to_training_dict(self.data, self.uids, self.iids)
+        self.tr_users = list(self.tr_data.keys())
         if not data_copy:
-            del self.data;
-        print('Loading finished!');
+            del self.data
+        tprint('Loading finished!')
 
-    def build_graph(self):
+    def build_graph(self) -> 'List[tf.placeholder[tf.int32]]':
         with tf.variable_scope('bpr', reuse=tf.AUTO_REUSE):
-            u = tf.placeholder(tf.int32, [None]);
-            i = tf.placeholder(tf.int32, [None]);
-            j = tf.placeholder(tf.int32, [None]);
+            u = tf.placeholder(tf.int32, [None])
+            i = tf.placeholder(tf.int32, [None])
+            j = tf.placeholder(tf.int32, [None])
 
-            self.__ue = tf.get_variable(name="user_embed", shape=[self.n_users, self.k], dtype=tf.float32, initializer=tf.random_normal_initializer(0, 0.01));
-            self.__ie = tf.get_variable(name="item_embed", shape=[self.n_items, self.k], dtype=tf.float32, initializer=tf.random_normal_initializer(0, 0.01));
-            self.__ib = tf.get_variable(name="item_bias",  shape=[self.n_items],         dtype=tf.float32, initializer=tf.constant_initializer(0.0));
+            self.__ue = tf.get_variable(name="user_embed", shape=[self.n_users, self.k], dtype=tf.float32, initializer=tf.random_normal_initializer(0, 0.01))
+            self.__ie = tf.get_variable(name="item_embed", shape=[self.n_items, self.k], dtype=tf.float32, initializer=tf.random_normal_initializer(0, 0.01))
+            self.__ib = tf.get_variable(name="item_bias",  shape=[self.n_items],         dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 
-        ueb = tf.nn.embedding_lookup(self.__ue, u);
-        ieb = tf.nn.embedding_lookup(self.__ie, i);
-        jeb = tf.nn.embedding_lookup(self.__ie, j);
-        ib  = tf.nn.embedding_lookup(self.__ib, i);
-        jb  = tf.nn.embedding_lookup(self.__ib, j);
+        ueb = tf.nn.embedding_lookup(self.__ue, u)
+        ieb = tf.nn.embedding_lookup(self.__ie, i)
+        jeb = tf.nn.embedding_lookup(self.__ie, j)
+        ib  = tf.nn.embedding_lookup(self.__ib, i)
+        jb  = tf.nn.embedding_lookup(self.__ib, j)
 
-        x_ui  = tf.reduce_sum(tf.multiply(ueb, ieb), 1);
-        x_uj  = tf.reduce_sum(tf.multiply(ueb, jeb), 1);
-        x_uij = ib - jb + x_ui - x_uj;
+        x_ui  = tf.reduce_sum(tf.multiply(ueb, ieb), 1)
+        x_uj  = tf.reduce_sum(tf.multiply(ueb, jeb), 1)
+        x_uij = ib - jb + x_ui - x_uj
         with tf.name_scope('output'):
-            self.pred = tf.matmul(ueb, tf.transpose(ieb)) + ib;
+            self.pred = tf.matmul(ueb, tf.transpose(ieb)) + ib
             if self.mode == 'l2':
                 self.obj = tf.reduce_sum(tf.log(1+tf.exp(-x_uij)))+\
                            0.5 * tf.reduce_sum(ueb**2*self.lu+ieb**2*self.li+jeb**2*self.lj)+\
-                           0.5 * tf.reduce_sum(ib**2+jb**2)*self.lb;
+                           0.5 * tf.reduce_sum(ib**2+jb**2)*self.lb
             else:
                 self.obj = tf.reduce_sum(tf.log(1+tf.exp(-x_uij)))+\
                            tf.reduce_sum(tf.abs(ueb)*self.lu+tf.abs(ieb)*self.li+tf.abs(jeb)*self.lj)+\
-                           tf.reduce_sum(tf.abs(ib)+tf.abs(jb))*self.lb;
-        self.solver = tf.train.RMSPropOptimizer(self.lr).minimize(self.obj);
-        return u, i, j;
+                           tf.reduce_sum(tf.abs(ib)+tf.abs(jb))*self.lb
+        self.solver = tf.train.RMSPropOptimizer(self.lr).minimize(self.obj)
+        return u, i, j
 
-    def train(self, model_path, sampling='user uniform', epochs=5, batch_size=256):
+    def train(self, sampling: str = 'user uniform', epochs: int = 5, batch_size: int = 256):
         with tf.Graph().as_default():
-            u, i, j = self.build_graph();
-            batch_limit = self.epoch_sample_limit//batch_size + 1;
-            sess = tf.Session(config=self.tf_config);
-            sampler = None;
+            u, i, j = self.build_graph()
+            batch_limit = self.epoch_sample_limit//batch_size + 1
+            sess = tf.Session(config = self.tf_config)
+            sampler = None
             if sampling == 'user uniform':
-                sampler = self._uniform_user_sampling;
+                sampler = self._uniform_user_sampling
             with sess.as_default():
-                sess.run(tf.global_variables_initializer());
-                print ('Training parameters: lu=%.6f, li=%.6f, lj=%.6f, lb=%.6f'%(self.lu, self.li, self.lj, self.lb));
-                print ('Learning rate is %.6f, regularization mode is %s'%(self.lr, self.mode));
-                print ('Training for %d epochs of %d batches using %s sampler'%(epochs, batch_limit, sampling));
+                sess.run(tf.global_variables_initializer())
+                tprint('Training parameters: lu=%.6f, li=%.6f, lj=%.6f, lb=%.6f' % (self.lu, self.li, self.lj, self.lb))
+                tprint('Learning rate is %.6f, regularization mode is %s' % (self.lr, self.mode))
+                tprint('Training for %d epochs of %d batches using %s sampler' % (epochs, batch_limit, sampling))
+                if hasattr(self, 'fue'):
+                    tprint('Initialize user embeddings')
+                    sess.run(tf.assign(self.__ue, self.fue))
+                if hasattr(self, 'fie'):
+                    tprint('Initialize item embeddings')
+                    sess.run(tf.assign(self.__ie, self.fie))
+                if hasattr(self, 'fib'):
+                    tprint('Initialize item biases')
+                    sess.run(tf.assign(self.__ib, self.fib.ravel()))
                 for eid in range(epochs):
-                    total_time = 0;
-                    bno = 1;
+                    total_time = 0
+                    bno = 1
                     for ub, ib, jb in sampler(batch_size):
-                        t1 = time.time();
-                        _, loss = sess.run([self.solver, self.obj], feed_dict={u:ub, i:ib, j:jb});
-                        t2 = time.time()-t1;
-                        sys.stderr.write('\rEpoch=%3d, batch=%6d, loss=%8.4f, time=%4.4fs'%(eid+1, bno, loss, t2));
-                        total_time += t2;
-                        bno += 1;
+                        t1 = time.time()
+                        _, loss = sess.run([self.solver, self.obj], feed_dict = {u:ub, i:ib, j:jb})
+                        t2 = time.time()-t1
+                        sys.stderr.write('\rEpoch=%3d, batch=%6d, loss=%8.4f, time=%4.4fs' % (eid+1, bno, loss, t2))
+                        total_time += t2
+                        bno += 1
                         if bno == batch_limit:
-                            break;
-                    sys.stderr.write(' ... total time collapse %8.4fs'%(total_time));
-                    sys.stderr.flush();
-                    print();
-            if os.path.exists(os.path.dirname(model_path)):
-                print ('Saving model to path %s'%(model_path))
-                export_embed_to_file(os.path.join(model_path, 'final-U.dat'), sess.run(self.__ue))
-                export_embed_to_file(os.path.join(model_path, 'final-V.dat'), sess.run(self.__ie))
-                export_embed_to_file(os.path.join(model_path, 'final-B.dat'), sess.run(tf.reshape(self.__ib, (-1,1))))
+                            break
+                    sys.stderr.write(' ... total time collapse %8.4fs'%(total_time))
+                    sys.stderr.flush()
+                    print()
+            self.fue = sess.run(self.__ue)
+            self.fie = sess.run(self.__ie)
+            self.fib = sess.run(tf.reshape(self.__ib, (-1, 1)))
 
-    def _uniform_user_sampling(self, batch_size): 
-        ib = np.zeros(batch_size, dtype=np.int32);
-        jb = np.zeros(batch_size, dtype=np.int32);
+    def _uniform_user_sampling(self, batch_size: int) -> 'List[np.ndarray[np.int32]]':
+        ib = np.zeros(batch_size, dtype=np.int32)
+        jb = np.zeros(batch_size, dtype=np.int32)
         while True:
-            ub = np.random.choice(self.tr_users, batch_size);
+            ub = np.random.choice(self.tr_users, batch_size)
             for i in range(batch_size):
-                ib[i] = np.random.choice(self.tr_data[ub[i]]);
-                jb[i] = np.random.choice(self.n_items);
+                ib[i] = np.random.choice(self.tr_data[ub[i]])
+                jb[i] = np.random.choice(self.n_items)
                 while jb[i] in self.tr_data[ub[i]]:
-                    jb[i] = np.random.choice(self.n_items);
-            yield ub, ib, jb;
+                    jb[i] = np.random.choice(self.n_items)
+            yield ub, ib, jb
 
-    def _data_to_training_dict(self, data, users, items):
+    def _data_to_training_dict(self, data: list, users: dict, items: dict) -> 'Defaultdict[list]':
         data_dict = defaultdict(list)
         for (user, item) in data:
-            data_dict[users[user]].append(items[item]);
-        return data_dict;
+            data_dict[users[user]].append(items[item])
+        return data_dict
 
diff --git a/single/cer.py b/single/cer.py
@@ -1,71 +1,75 @@
+"""
+    Collaborative Embedding Regression (CER)
+    Sampling Method : uniform item sampling per user
+    Author          : Xingzhong Du
+    E-mail          : [email protected]
+    Reference       : "Personalized Video Recommendation Using Rich Contents from Videos", Xingzhong Du, et al.
+"""
+
 import numpy as np
 import os
-import pickle
-import scipy.sparse as ss
-import tensorflow as tf
 import time
 from .wmf import WMF
-from utils import get_id_dict_from_file, export_embed_to_file
+from utils import tprint, get_embed_from_file, export_embed_to_file
 
-class CER(WMF):
-    def __init__(self, k, d, lu=0.01, lv=10, le=10e3, a=1, b=0.01):
-        self.__sn = 'cer';
-        WMF.__init__(self, k, lu, lv, a, b);
-        self.d  = d;
-        self.le = le;
 
-    def load_content_data(self, content_file, iid_file):
-        print ('Load content data from %s'%(content_file));
-        fiids  = get_id_dict_from_file(iid_file);
-        self.F = np.zeros((self.n_items, self.d), dtype=np.float32);
-        F      = pickle.load(open(content_file, 'rb'), encoding='latin1');
-        if ss.issparse(F):
-            F = F.toarray();
-        for iid in self.iids:
-            if iid in fiids:
-                self.F[self.iids[iid],:]=F[fiids[iid],:];
-        print('Loading finished!');
+class CER(WMF):
+    def __init__(self, k: int, d: int, lu: float = 0.01, lv: float = 10, le: float = 10e3, a: float = 1, b: float = 0.01) -> None:
+        self.__sn = 'cer'
+        WMF.__init__(self, k, lu, lv, a, b)
+        self.d = d
+        self.le = le
 
-    def train(self, model_path, max_iter=200):
-        loss  = np.exp(50);
-        Ik    = np.eye(self.k, dtype=np.float32);
-        FF    = self.lv * np.dot(self.F.T, self.F) + self.le * np.eye(self.F.shape[1]);
-        self.E= np.random.randn(self.F.shape[1], self.k).astype(np.float32);
+    def train(self, max_iter: int = 200) -> None:
+        loss = np.exp(50)
+        Ik = np.eye(self.k, dtype=np.float32)
+        FF = self.lv * np.dot(self.feat.T, self.feat) + self.le * np.eye(self.feat.shape[1])
+        if not hasattr(self, 'E'):
+            self.E = np.random.randn(self.feat.shape[1], self.k).astype(np.float32)
         for it in range(max_iter):
-            t1     = time.time();
-            self.V = np.dot(self.F, self.E);
-            loss_old = loss;
-            loss     = 0;
-            Vr = self.V[np.array(self.i_rated), :];
-            XX = np.dot(Vr.T, Vr)*self.b + Ik*self.lu;
+            t1 = time.time()
+            self.fie = np.dot(self.feat, self.E)
+            loss_old = loss
+            loss = 0
+            Vr = self.fie[np.array(self.i_rated), :]
+            XX = np.dot(Vr.T, Vr) * self.b + Ik * self.lu
             for i in self.usm:
                 if len(self.usm[i]) > 0:
-                    Vi = self.V[np.array(self.usm[i]), :];
-                    self.U[i,:] = np.linalg.solve(np.dot(Vi.T, Vi)*(self.a-self.b)+XX, np.sum(Vi, axis=0)*self.a);
-                    loss += 0.5 * self.lu * np.sum(self.U[i,:]**2);
-            Ur = self.U[np.array(self.u_rated), :];
-            XX = np.dot(Ur.T, Ur)*self.b
+                    Vi = self.fie[np.array(self.usm[i]), :]
+                    self.fue[i, :] = np.linalg.solve(np.dot(Vi.T, Vi) * (self.a - self.b) + XX,
+                                                     np.sum(Vi, axis=0) * self.a)
+                    loss += 0.5 * self.lu * np.sum(self.fue[i, :] ** 2)
+            Ur = self.fue[np.array(self.u_rated), :]
+            XX = np.dot(Ur.T, Ur) * self.b
             for j in self.ism:
-                B  = XX;
-                Fe = self.V[j,:].copy();
+                B = XX
+                Fe = self.fie[j, :].copy()
                 if len(self.ism[j]) > 0:
-                    Uj = self.U[np.array(self.ism[j]), :];
-                    B += np.dot(Uj.T, Uj)*(self.a-self.b); 
-                    self.V[j,:] = np.linalg.solve(B+Ik*self.lv, np.sum(Uj, axis=0)*self.a + Fe*self.lv);
-                    loss += 0.5 * np.linalg.multi_dot((self.V[j,:], B, self.V[j,:]));
-                    loss += 0.5 * len(self.ism[j])*self.a;
-                    loss -= np.sum(np.multiply(Uj, self.V[j,:]))*self.a;
+                    Uj = self.fue[np.array(self.ism[j]), :]
+                    B += np.dot(Uj.T, Uj) * (self.a - self.b)
+                    self.fie[j, :] = np.linalg.solve(B + Ik * self.lv, np.sum(Uj, axis=0) * self.a + Fe * self.lv)
+                    loss += 0.5 * np.linalg.multi_dot((self.fie[j, :], B, self.fie[j, :]))
+                    loss += 0.5 * len(self.ism[j]) * self.a
+                    loss -= np.sum(np.multiply(Uj, self.fie[j, :])) * self.a
                 else:
-                    self.V[j,:] = np.linalg.solve(B+Ik*self.lv, Fe*self.lv);
-                loss += 0.5 * self.lv * np.sum((self.V[j,:] - Fe)**2);
-            self.E = np.linalg.solve(FF, self.lv * np.dot(self.F.T, self.V));
-            loss  += 0.5 * self.le * np.sum(self.E ** 2);
-            print ('Iter %3d, loss %.6f, time %.2fs'%(it, loss, time.time()-t1));
-        if os.path.exists(os.path.dirname(model_path)):
-            print ('Saving model to path %s'%(model_path))
-            Fe = np.dot(self.F, self.E);
-            for iidx in self.ism:
-                if iidx not in self.i_rated:
-                    self.V[iidx, :] = Fe[iidx, :];
-            export_embed_to_file(os.path.join(model_path, 'final-U.dat'), self.U);
-            export_embed_to_file(os.path.join(model_path, 'final-V.dat'), self.V);
+                    self.fie[j, :] = np.linalg.solve(B + Ik * self.lv, Fe * self.lv)
+                loss += 0.5 * self.lv * np.sum((self.fie[j, :] - Fe) ** 2)
+            self.E = np.linalg.solve(FF, self.lv * np.dot(self.feat.T, self.fie))
+            loss += 0.5 * self.le * np.sum(self.E ** 2)
+            tprint('Iter %3d, loss %.6f, time %.2fs' % (it, loss, time.time() - t1))
+        Fe = np.dot(self.feat, self.E)
+        for iidx in self.ism:
+            if iidx not in self.i_rated:
+                self.fie[iidx, :] = Fe[iidx, :]
+
+    def import_embeddings(self, model_path: str) -> None:
+        super().import_embeddings(model_path)
+        file_path = os.path.join(model_path, 'final-E.dat')
+        if os.path.exists(file_path):
+            self.E = get_embed_from_file(file_path)
+
+    def export_embeddings(self, model_path: str) -> None:
+        super().export_embeddings(model_path)
+        if os.path.exists(os.path.exists(model_path)):
+            if hasattr(self, 'E'):
+                export_embed_to_file(os.path.join(model_path, 'final-E.dat'), self.E)