3
3
import tensorflow as tf
4
4
import sys
5
5
import os
6
- print tf .__version__
6
+ print (tf .__version__ )
7
+ import fire
8
+ from elapsedtimer import ElapsedTimer
7
9
8
10
class recommender :
9
11
10
- def __init__ (self ,infile ):
11
-
12
- self .train_file = '/home/santanu/Downloads/RBM Recommender/ml-100k/train_data.npy'
13
- self .data = np .load (infile )
12
+ def __init__ (self ,mode ,train_file ,outdir ,test_file = None ,
13
+ batch_size = 32 ,epochs = 500 ,
14
+ learning_rate = 1e-3 ,num_hidden = 50 ,
15
+ display_step = 5 ):
16
+
17
+
18
+ self .mode = mode
19
+ self .train_file = train_file
20
+ self .outdir = outdir
21
+ self .test_file = test_file
22
+ self .batch_size = batch_size
23
+ self .learning_rate = learning_rate
24
+ self .num_hidden = num_hidden
25
+ self .epochs = epochs
26
+ self .display_step = display_step
14
27
15
- if sys .argv [1 ] == 'train' :
16
- self .train_file = infile
17
- self .data = np .load (infile )
18
- else :
19
- #elf.test_file = infile
20
- self .data = np .load (infile )
21
- self .user_index = list (self .data [:,0 ])
22
- self .movie_index = list (self .data [:,1 ])
23
- self .rating_index = list (self .data [:,2 ])
28
+
29
+ def read_data (self ):
30
+
31
+ if self .mode == 'train' :
24
32
self .train_data = np .load (self .train_file )
25
- self .test_data = self .train_data [self .user_index ,:,:]
26
-
33
+ self .num_ranks = self .train_data .shape [2 ]
34
+ self .num_movies = self .train_data .shape [1 ]
35
+ self .users = self .train_data .shape [0 ]
36
+
37
+ else :
38
+ self .train_df = pd .read_csv (self .train_file )
39
+ self .test_data = np .load (self .test_file )
40
+ self .test_df = pd .DataFrame (self .test_data ,columns = ['userid' ,'movieid' ,'rating' ])
41
+
42
+
27
43
28
44
29
- #self.data = np.load(infile)
30
- self .ranks = 5
31
- self .batch_size = 32
32
- self .epochs = 500
33
- self .learning_rate = 1e-4
34
- self .users = self .train_data .shape [0 ]
35
- self .num_hidden = 500
36
- self .num_movies = self .train_data .shape [1 ]
37
- self .num_ranks = 5
38
- self .display_step = 1
39
- self .path_save = sys .argv [3 ]
40
45
41
46
def next_batch (self ):
42
47
while True :
43
- ix = np .random .choice (np .arange (self .data .shape [0 ]),self .batch_size )
44
- train_X = self .data [ix ,:,:]
48
+ ix = np .random .choice (np .arange (self .train_data .shape [0 ]),self .batch_size )
49
+ train_X = self .train_data [ix ,:,:]
45
50
yield train_X
46
51
47
52
@@ -64,7 +69,7 @@ def sample_visible(logits):
64
69
sampled_logits = tf .multinomial (logits ,1 )
65
70
sampled_logits = tf .one_hot (sampled_logits ,depth = 5 )
66
71
logits = tf .reshape (logits ,[- 1 ,self .num_movies * self .num_ranks ])
67
- print logits
72
+ print ( logits )
68
73
return logits
69
74
70
75
@@ -110,27 +115,27 @@ def _train(self):
110
115
# TensorFlow graph execution
111
116
112
117
with tf .Session () as sess :
113
- saver = tf .train .Saver (max_to_keep = 100 , write_version = 1 )
118
+ self . saver = tf .train .Saver ()
114
119
#saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
115
120
# Initialize the variables of the Model
116
121
init = tf .global_variables_initializer ()
117
122
sess .run (init )
118
123
119
- total_batches = self .data .shape [0 ]// self .batch_size
124
+ total_batches = self .train_data .shape [0 ]// self .batch_size
120
125
batch_gen = self .next_batch ()
121
126
# Start the training
122
127
for epoch in range (self .epochs ):
123
128
if epoch < 150 :
124
- k = 2
129
+ self . k = 2
125
130
126
131
if (epoch > 150 ) & (epoch < 250 ):
127
- k = 3
132
+ self . k = 3
128
133
129
134
if (epoch > 250 ) & (epoch < 350 ):
130
- k = 5
135
+ self . k = 5
131
136
132
137
if (epoch > 350 ) & (epoch < 500 ):
133
- k = 9
138
+ self . k = 9
134
139
135
140
# Loop over all batches
136
141
for i in range (total_batches ):
@@ -142,69 +147,40 @@ def _train(self):
142
147
# Display the running step
143
148
if epoch % self .display_step == 0 :
144
149
print ("Epoch:" , '%04d' % (epoch + 1 ))
145
- saver .save (sess , os .path .join (self .path_save ,'model' ), global_step = epoch )
150
+ print (self .outdir )
151
+ self .saver .save (sess ,os .path .join (self .outdir ,'model' ), global_step = epoch )
152
+ # Do the prediction for all users all items irrespective of whether they have been rated
153
+ self .logits_pred = tf .reshape (self .x_ ,[self .users ,self .num_movies ,self .num_ranks ])
154
+ self .probs = tf .nn .softmax (self .logits_pred ,axis = 2 )
155
+ out = sess .run (self .probs ,feed_dict = {self .x :self .train_data })
156
+ recs = []
157
+ for i in range (self .users ):
158
+ for j in range (self .num_movies ):
159
+ rec = [i ,j ,np .argmax (out [i ,j ,:]) + 1 ]
160
+ recs .append (rec )
161
+ recs = np .array (recs )
162
+ df_pred = pd .DataFrame (recs ,columns = ['userid' ,'movieid' ,'predicted_rating' ])
163
+ df_pred .to_csv (self .outdir + 'pred_all_recs.csv' ,index = False )
146
164
147
- print ("RBM training Completed !" )
148
-
149
-
165
+ print ("RBM training Completed !" )
150
166
151
- def _inference (self ):
152
-
153
- self .model_path = sys .argv [3 ]
154
-
155
- #self.test_data = self.data
156
- self .__network ()
157
- sess = tf .Session ()
158
-
159
- saver = tf .train .Saver (tf .all_variables (), reshape = True )
160
- saver .restore (sess ,self .model_path )
161
- x_ = tf .matmul (self .h ,tf .transpose (self .W )) + self .b_v
162
- #print x_
163
- logits = tf .reshape (x_ ,[- 1 ,self .num_ranks ])
164
- # print logits
165
- logits = tf .argmax (logits ,axis = - 1 )
166
- # print logits
167
- logits = tf .reshape (logits ,[- 1 ,self .num_movies ])
168
- out = sess .run (logits ,feed_dict = {self .x :self .test_data })
169
- ratings_pred = []
170
- i = 0
171
- for x in self .movie_index :
172
- pred = out [i ,x ] + 1
173
- ratings_pred .append (pred )
174
- i += 1
175
-
176
- ratings_pred = np .array (ratings_pred )
177
- ratings_pred = np .reshape (ratings_pred ,(- 1 ,1 ))
178
- print ratings_pred .shape
179
- print self .data .shape
180
- out = np .hstack ((self .data ,ratings_pred ))
181
- out = pd .DataFrame (out )
182
- print out
183
- out .columns = ['User' ,'Movie' ,'Actual Rating' ,'Predicted Rating' ]
184
- return out
185
-
167
+ def inference (self ):
186
168
187
-
188
-
189
-
190
-
191
- if __name__ == '__main__' :
192
-
193
- if sys .argv [1 ] == 'train' :
194
-
195
- infile = sys .argv [2 ]
196
- model = recommender (infile )
197
- model ._train ()
198
-
199
- if sys .argv [1 ] == 'test' :
200
-
201
- infile = sys .argv [2 ]
202
-
203
- model = recommender (infile )
204
- out = model ._inference ()
205
- out .to_csv ('/home/santanu/Downloads/RBM Recommender/results.csv' )
169
+ self .df_result = self .test_df .merge (self .train_df ,on = ['userid' ,'movieid' ])
170
+ self .df_result .to_csv (self .outdir + 'test_results.csv' ,index = False )
171
+ print (f'output written to { self .outdir } test_results.csv' )
172
+ test_rmse = (np .mean ((self .df_result ['rating' ].values - self .df_result ['predicted_rating' ].values )** 2 ))** 0.5
173
+ print (f'test RMSE : { test_rmse } ' )
206
174
207
175
208
-
176
+ def main_process (self ):
177
+ self .read_data ()
209
178
210
-
179
+ if self .mode == 'train' :
180
+ self ._train ()
181
+ else :
182
+ self .inference ()
183
+
184
+ if __name__ == '__main__' :
185
+ with ElapsedTimer ('process RBM' ):
186
+ fire .Fire (recommender )
0 commit comments