Skip to content

Commit bf210d7

Browse files
author
浅梦
authored
Update run_dsin.py
1 parent be65ce9 commit bf210d7

15 files changed

+25
-165
lines changed

deepctr/inputs.py

+1-24
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from itertools import chain
1111

1212
from tensorflow.python.keras.initializers import RandomNormal
13-
from tensorflow.python.keras.layers import Concatenate, Dense, Embedding, Input, Reshape, add,Flatten
13+
from tensorflow.python.keras.layers import Concatenate, Dense, Embedding, Input, add,Flatten
1414
from tensorflow.python.keras.regularizers import l2
1515

1616
from .layers.sequence import SequencePoolingLayer
@@ -152,14 +152,6 @@ def get_varlen_pooling_list(embedding_dict, features, varlen_sparse_feature_colu
152152
pooling_vec_list.append(vec)
153153
return pooling_vec_list
154154

155-
# def get_pooling_vec_list(sequence_embed_dict, sequence_len_dict, sequence_max_len_dict, sequence_fd_list):
156-
# if sequence_max_len_dict is None or sequence_len_dict is None:
157-
# return [SequencePoolingLayer(feat.combiner, supports_masking=True)(sequence_embed_dict[feat.name]) for feat in
158-
# sequence_fd_list]
159-
# else:
160-
# return [SequencePoolingLayer(feat.combiner, supports_masking=False)(
161-
# [sequence_embed_dict[feat.name], sequence_len_dict[feat.name]]) for feat in sequence_fd_list]
162-
163155

164156
def get_inputs_list(inputs):
165157
return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs)))))
@@ -233,21 +225,6 @@ def get_dense_input(features,feature_columns):
233225
return dense_input_list
234226

235227

236-
# def get_varlen_vec_list(embedding_dict, features, varlen_sparse_feature_columns):
237-
# vec_list = []
238-
# for fc in varlen_sparse_feature_columns:
239-
# feature_name = fc.name
240-
# feature_length_name = feature_name + "_seq_length"
241-
# if feature_length_name in features:
242-
# vector = SequencePoolingLayer(fc.combiner, supports_masking=False)(
243-
# [embedding_dict[feature_name], features[feature_length_name]])
244-
# else:
245-
# vector = SequencePoolingLayer(fc.combiner, supports_masking=True)(embedding_dict[feature_name])
246-
# vec_list.append(vector)
247-
# return vec_list
248-
249-
250-
251228
def input_from_feature_columns(features,feature_columns, embedding_size, l2_reg, init_std, seed,prefix='',seq_mask_zero=True):
252229

253230

deepctr/models/dcn.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"""
99
import tensorflow as tf
1010

11-
from ..inputs import input_from_feature_columns,build_input_features
11+
from ..inputs import input_from_feature_columns,build_input_features,combined_dnn_input
1212
from ..layers.core import PredictionLayer, DNN
1313
from ..layers.interaction import CrossNet
1414
from ..layers.utils import concat_fun
@@ -38,16 +38,14 @@ def DCN(dnn_feature_columns, embedding_size='auto', cross_num=2, dnn_hidden_unit
3838
if len(dnn_hidden_units) == 0 and cross_num == 0:
3939
raise ValueError("Either hidden_layer or cross layer must > 0")
4040

41-
#check_feature_config_dict(feature_dim_dict)
4241
features = build_input_features(dnn_feature_columns)
4342
inputs_list = list(features.values())
4443

4544
sparse_embedding_list, dense_value_list = input_from_feature_columns(features,dnn_feature_columns,
4645
embedding_size,
4746
l2_reg_embedding, init_std,
4847
seed)
49-
#todo not support dense?
50-
dnn_input = tf.keras.layers.Flatten()(concat_fun(sparse_embedding_list))
48+
dnn_input = combined_dnn_input(sparse_embedding_list,dense_value_list)
5149

5250
if len(dnn_hidden_units) > 0 and cross_num > 0: # Deep & Cross
5351
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,

deepctr/models/din.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,9 @@
77
[1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. (https://arxiv.org/pdf/1706.06978.pdf)
88
"""
99

10-
from collections import OrderedDict
1110

12-
from tensorflow.python.keras.initializers import RandomNormal
13-
from tensorflow.python.keras.layers import Input, Dense, Embedding, Concatenate, Flatten
11+
from tensorflow.python.keras.layers import Dense,Concatenate, Flatten
1412
from tensorflow.python.keras.models import Model
15-
from tensorflow.python.keras.regularizers import l2
1613

1714
from ..inputs import build_input_features,create_embedding_matrix,SparseFeat,VarLenSparseFeat,DenseFeat,embedding_lookup,get_dense_input,varlen_embedding_lookup,get_varlen_pooling_list,combined_dnn_input
1815
from ..layers.core import DNN, PredictionLayer

deepctr/models/dsin.py

+5-43
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from ..layers.utils import NoMask, concat_fun
2525

2626

27-
def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, bias_encoding=False,
27+
def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_count=5, bias_encoding=False,
2828
att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', dnn_dropout=0,
2929
dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, init_std=0.0001, seed=1024, task='binary',
3030
):
@@ -49,33 +49,12 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun
4949
:return: A Keras model instance.
5050
5151
"""
52-
#check_feature_config_dict(dnn_feature_columns)
5352

5453
if (att_embedding_size * att_head_num != len(sess_feature_list) * embedding_size):
5554
raise ValueError(
5655
"len(session_feature_lsit) * embedding_size must equal to att_embedding_size * att_head_num ,got %d * %d != %d *%d" % (
5756
len(sess_feature_list), embedding_size, att_embedding_size, att_head_num))
5857

59-
# sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input(
60-
# dnn_feature_columns, sess_feature_list, sess_max_count, sess_len_max)
61-
62-
# def get_input(feature_dim_dict, seq_feature_list, sess_max_count, seq_max_len):
63-
# sparse_input, dense_input = build_input_features(feature_dim_dict)
64-
# user_behavior_input = {}
65-
# for idx in range(sess_max_count):
66-
# sess_input = OrderedDict()
67-
# for i, feat in enumerate(seq_feature_list):
68-
# sess_input[feat] = Input(
69-
# shape=(seq_max_len,), name='seq_' + str(idx) + str(i) + '-' + feat)
70-
#
71-
# user_behavior_input["sess_" + str(idx)] = sess_input
72-
#
73-
# user_behavior_length = {"sess_" + str(idx): Input(shape=(1,), name='seq_length' + str(idx)) for idx in
74-
# range(sess_max_count)}
75-
# user_sess_length = Input(shape=(1,), name='sess_length')
76-
#
77-
# return sparse_input, dense_input, user_behavior_input, user_behavior_length, user_sess_length
78-
7958

8059
features = build_input_features(dnn_feature_columns)
8160

@@ -85,15 +64,13 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun
8564
varlen_sparse_feature_columns = list(filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
8665

8766

88-
history_feature_columns = []
67+
8968
sparse_varlen_feature_columns = []
9069
history_fc_names = list(map(lambda x: "sess" + x, sess_feature_list))
91-
#user_behavior_input_dict = {"sess_"+str(i):{} for i in range(sess_max_count)}
9270
for fc in varlen_sparse_feature_columns:
9371
feature_name = fc.name
9472
if feature_name in history_fc_names:
9573
continue
96-
#history_feature_columns.append(fc)
9774
else:
9875
sparse_varlen_feature_columns.append(fc)
9976

@@ -106,13 +83,11 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun
10683
sess_input = OrderedDict()
10784
for i, feat in enumerate(sess_feature_list):
10885
sess_input[feat] = features["sess_"+str(idx)+"_"+feat]
109-
#Input(shape=(seq_max_len,), name='seq_' + str(idx) + str(i) + '-' + feat)
86+
11087

11188
user_behavior_input_dict["sess_" + str(idx)] = sess_input
11289

11390

114-
#user_behavior_length = {"sess_" + str(idx): Input(shape=(1,), name='seq_length' + str(idx)) for idx in
115-
# range(sess_max_count)}
11691
user_sess_length = Input(shape=(1,), name='sess_length')
11792

11893

@@ -130,20 +105,12 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun
130105

131106

132107
query_emb_list = embedding_lookup(embedding_dict,features,sparse_feature_columns,sess_feature_list,sess_feature_list)#query是单独的
133-
keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names)
134108
dnn_input_emb_list = embedding_lookup(embedding_dict,features,sparse_feature_columns,mask_feat_list=sess_feature_list)
135109
dense_value_list = get_dense_input(features, dense_feature_columns)
136110

137-
138-
139-
140-
#query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, dnn_feature_columns["sparse"],
141-
# sess_feature_list, sess_feature_list)
142-
143111
query_emb = concat_fun(query_emb_list)
144112

145-
#dnn_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, dnn_feature_columns["sparse"],
146-
# mask_feat_list=sess_feature_list)
113+
147114
dnn_input_emb = concat_fun(dnn_input_emb_list)
148115
dnn_input_emb = Flatten()(NoMask()(dnn_input_emb))
149116

@@ -167,9 +134,7 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun
167134

168135
dnn_input_emb = Concatenate()(
169136
[dnn_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer)])
170-
# if len(dense_input) > 0:
171-
# deep_input_emb = Concatenate()(
172-
# [deep_input_emb] + list(dense_input.values()))
137+
173138
dnn_input_emb = combined_dnn_input([dnn_input_emb],dense_value_list)
174139
output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn,
175140
dnn_dropout, dnn_use_bn, seed)(dnn_input_emb)
@@ -184,9 +149,6 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun
184149
[user_behavior_input_dict[sess_name]]))
185150
# sess_input_length_list.append(user_behavior_length_dict[sess_name])
186151

187-
# model_input_list = get_inputs_list([sparse_input, dense_input]) + sess_input_list + [
188-
# user_sess_length]
189-
#
190152

191153
model = Model(inputs=inputs_list+[user_sess_length], outputs=output)
192154

deepctr/models/fgcnn.py

+2-52
Original file line numberDiff line numberDiff line change
@@ -11,33 +11,12 @@
1111
"""
1212
import tensorflow as tf
1313

14-
from ..inputs import build_input_features, get_linear_logit,input_from_feature_columns
14+
from ..inputs import build_input_features, input_from_feature_columns
1515
from ..layers.core import PredictionLayer, DNN
1616
from ..layers.interaction import InnerProductLayer, FGCNNLayer
1717
from ..layers.utils import concat_fun
1818

1919

20-
# def preprocess_input_embedding(feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed,
21-
# return_linear_logit=True, ):
22-
# sparse_input_dict, dense_input_dict = build_input_features(feature_dim_dict)
23-
# sequence_input_dict, sequence_input_len_dict, sequence_max_len_dict = create_varlenfeat_inputdict(
24-
# feature_dim_dict)
25-
# inputs_list, deep_emb_list, linear_emb_list = get_inputs_embedding(None, feature_dim_dict, l2_reg_embedding,
26-
# l2_reg_linear, init_std, seed, sparse_input_dict,
27-
# dense_input_dict, sequence_input_dict,
28-
# sequence_input_len_dict, sequence_max_len_dict,
29-
# return_linear_logit, embedding_size, prefix='')
30-
# _, fg_deep_emb_list, _ = get_inputs_embedding(None, feature_dim_dict, l2_reg_embedding, l2_reg_linear, init_std,
31-
# seed, sparse_input_dict, dense_input_dict, sequence_input_dict,
32-
# sequence_input_len_dict, sequence_max_len_dict, False, embedding_size,
33-
# prefix='fg')
34-
# if return_linear_logit:
35-
# linear_logit = get_linear_logit(
36-
# linear_emb_list, dense_input_dict, l2_reg_linear)
37-
# else:
38-
# linear_logit = None
39-
# return deep_emb_list, fg_deep_emb_list, linear_logit, inputs_list
40-
4120

4221
def unstack(input_tensor):
4322
input_ = tf.expand_dims(input_tensor, axis=2)
@@ -65,8 +44,7 @@ def FGCNN(dnn_feature_columns, embedding_size=8, conv_kernel_width=(7, 7, 7, 7),
6544
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
6645
:return: A Keras model instance.
6746
"""
68-
#todo 这个还没修改
69-
#check_feature_config_dict(feature_dim_dict)
47+
7048
if not (len(conv_kernel_width) == len(conv_filters) == len(new_maps) == len(pooling_width)):
7149
raise ValueError(
7250
"conv_kernel_width,conv_filters,new_maps and pooling_width must have same length")
@@ -85,34 +63,6 @@ def FGCNN(dnn_feature_columns, embedding_size=8, conv_kernel_width=(7, 7, 7, 7),
8563
seed,prefix='fg')
8664

8765

88-
# sequence_input_dict, sequence_input_len_dict, sequence_max_len_dict = create_varlenfeat_inputdict(
89-
# feature_dim_dict)
90-
# inputs_list, deep_emb_list, linear_emb_list = get_inputs_embedding(None, feature_dim_dict, l2_reg_embedding,
91-
# l2_reg_linear, init_std, seed, sparse_input_dict,
92-
# dense_input_dict, sequence_input_dict,
93-
# sequence_input_len_dict, sequence_max_len_dict,
94-
# return_linear_logit, embedding_size, prefix='')
95-
# _, fg_deep_emb_list, _ = get_inputs_embedding(None, feature_dim_dict, l2_reg_embedding, l2_reg_linear, init_std,
96-
# seed, sparse_input_dict, dense_input_dict, sequence_input_dict,
97-
# sequence_input_len_dict, sequence_max_len_dict, False, embedding_size,
98-
# prefix='fg')
99-
# if return_linear_logit:
100-
# linear_logit = get_linear_logit(
101-
# linear_emb_list, dense_input_dict, l2_reg_linear)
102-
# else:
103-
# linear_logit = None
104-
# return deep_emb_list, fg_deep_emb_list, linear_logit, inputs_list
105-
106-
107-
108-
109-
110-
111-
# deep_emb_list, fg_deep_emb_list, _, inputs_list = preprocess_input_embedding(dnn_feature_columns,
112-
# embedding_size,
113-
# l2_reg_embedding,
114-
# 0, init_std,
115-
# seed, False)
11666
fg_input = concat_fun(fg_deep_emb_list, axis=1)
11767
origin_input = concat_fun(deep_emb_list, axis=1)
11868

deepctr/models/mlr.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,12 @@ def MLR(region_feature_columns, base_feature_columns=None, region_num=4,
3838
# raise ValueError(
3939
# "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
4040

41-
same_flag = False
41+
4242
if base_feature_columns is None or len(base_feature_columns) == 0:
4343
base_feature_columns = region_feature_columns
44-
same_flag = True
44+
4545
if bias_feature_columns is None:
4646
bias_feature_columns = []
47-
#for feat in region_feature_columns['sparse'] + base_feature_columns['sparse'] + bias_feature_columns['sparse']:
48-
# if feat.hash_flag:
49-
# raise ValueError("Feature Hashing on the fly is no supported in MLR") #TODO:support feature hashing on the MLR
50-
5147

5248
features = build_input_features(region_feature_columns + base_feature_columns+bias_feature_columns)
5349

deepctr/models/nfm.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ def NFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, dnn_hidde
3434
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
3535
:return: A Keras model instance.
3636
"""
37-
#check_feature_config_dict(linear_feature_columns)
3837

3938
features = build_input_features(linear_feature_columns + dnn_feature_columns)
4039

@@ -44,16 +43,17 @@ def NFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, dnn_hidde
4443
embedding_size,
4544
l2_reg_embedding,init_std,
4645
seed)
47-
#todo not support dense
46+
4847
linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std,
4948
seed=seed, prefix='linear')
5049

5150
fm_input = concat_fun(sparse_embedding_list, axis=1)
5251
bi_out = BiInteractionPooling()(fm_input)
5352
if bi_dropout:
5453
bi_out = tf.keras.layers.Dropout(bi_dropout)(bi_out, training=None)
54+
dnn_input = combined_dnn_input([bi_out],dense_value_list)
5555
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
56-
False, seed)(bi_out)
56+
False, seed)(dnn_input)
5757
deep_logit = tf.keras.layers.Dense(
5858
1, use_bias=False, activation=None)(deep_out)
5959

deepctr/models/pnn.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import tensorflow as tf
1111

12-
from ..inputs import input_from_feature_columns,build_input_features
12+
from ..inputs import input_from_feature_columns,build_input_features,combined_dnn_input
1313
from ..layers.core import PredictionLayer, DNN
1414
from ..layers.interaction import InnerProductLayer, OutterProductLayer
1515
from ..layers.utils import concat_fun
@@ -36,7 +36,6 @@ def PNN(dnn_feature_columns, embedding_size=8, dnn_hidden_units=(128, 128), l2_r
3636
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
3737
:return: A Keras model instance.
3838
"""
39-
#check_feature_config_dict(dnn_feature_columns)
4039

4140
if kernel_type not in ['mat', 'vec', 'num']:
4241
raise ValueError("kernel_type must be mat,vec or num")
@@ -49,7 +48,6 @@ def PNN(dnn_feature_columns, embedding_size=8, dnn_hidden_units=(128, 128), l2_r
4948
embedding_size,
5049
l2_reg_embedding,init_std,
5150
seed)
52-
# todo note support dense
5351
inner_product = tf.keras.layers.Flatten()(InnerProductLayer()(sparse_embedding_list))
5452
outter_product = OutterProductLayer(kernel_type)(sparse_embedding_list)
5553

@@ -69,8 +67,9 @@ def PNN(dnn_feature_columns, embedding_size=8, dnn_hidden_units=(128, 128), l2_r
6967
else:
7068
deep_input = linear_signal
7169

70+
dnn_input = combined_dnn_input([deep_input],dense_value_list)
7271
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
73-
False, seed)(deep_input)
72+
False, seed)(dnn_input)
7473
deep_logit = tf.keras.layers.Dense(
7574
1, use_bias=False, activation=None)(deep_out)
7675

examples/run_dsin.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def get_xy_fd(hash_flag=False):
4646
if __name__ == "__main__":
4747
x, y, feature_dim_dict, behavior_feature_list = get_xy_fd(True)
4848

49-
model = DSIN(feature_dim_dict, behavior_feature_list, sess_max_count=2, sess_len_max=4, embedding_size=4,
49+
model = DSIN(feature_dim_dict, behavior_feature_list, sess_max_count=2, embedding_size=4,
5050
dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, )
5151

5252
model.compile('adam', 'binary_crossentropy',

tests/models/DCN_test.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import pytest
22

3-
from deepctr.inputs import SparseFeat
43
from deepctr.models import DCN
54
from ..utils import check_model, get_test_data,SAMPLE_SIZE
65

0 commit comments

Comments
 (0)