Skip to content

Commit d524c86

Browse files
author
Weichen Shen
authored
Refactor & Add sequence input support
* Refactor Input&Embedding * Support sequence(multi-value) input for AFM,AutoInt,DCN,DeepFM,FNN,NFM,PNN,xDeepFM models
1 parent cc844f3 commit d524c86

33 files changed

+722
-767
lines changed

deepctr/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@
33
from .import sequence
44
from . import models
55
from .utils import check_version
6-
__version__ = '0.2.1'
6+
__version__ = '0.2.2'
77
check_version(__version__)

deepctr/input_embedding.py

+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
from itertools import chain
2+
3+
from tensorflow.python.keras import Input
4+
from tensorflow.python.keras.initializers import RandomNormal
5+
from tensorflow.python.keras.layers import Embedding, Dense, Reshape, Concatenate
6+
from tensorflow.python.keras.regularizers import l2
7+
from .sequence import SequencePoolingLayer
8+
from .utils import get_linear_logit
9+
10+
11+
def create_input_dict(feature_dim_dict, prefix=''):
12+
sparse_input = {feat: Input(shape=(1,), name=prefix+'sparse_' + str(i) + '-' + feat) for i, feat in
13+
enumerate(feature_dim_dict["sparse"])}
14+
dense_input = {feat: Input(shape=(1,), name=prefix+'dense_' + str(i) + '-' + feat) for i, feat in
15+
enumerate(feature_dim_dict["dense"])}
16+
return sparse_input, dense_input
17+
18+
19+
def create_sequence_input_dict(feature_dim_dict):
20+
21+
sequence_dim_dict = feature_dim_dict.get('sequence', [])
22+
sequence_input_dict = {feat.name: Input(shape=(feat.maxlen,), name='seq_' + str(
23+
i) + '-' + feat.name) for i, feat in enumerate(sequence_dim_dict)}
24+
sequence_pooling_dict = {feat.name: feat.combiner
25+
for i, feat in enumerate(sequence_dim_dict)}
26+
sequence_len_dict = {feat.name: Input(shape=(
27+
1,), name='seq_length'+str(i)+'-'+feat.name) for i, feat in enumerate(sequence_dim_dict)}
28+
sequence_max_len_dict = {feat.name: feat.maxlen
29+
for i, feat in enumerate(sequence_dim_dict)}
30+
return sequence_input_dict, sequence_pooling_dict, sequence_len_dict, sequence_max_len_dict
31+
32+
33+
def create_embedding_dict(feature_dim_dict, embedding_size, init_std, seed, l2_reg, prefix='sparse'):
34+
if embedding_size == 'auto':
35+
36+
sparse_embedding = {feat: Embedding(feature_dim_dict["sparse"][feat], 6 * int(pow(feature_dim_dict["sparse"][feat], 0.25)),
37+
embeddings_initializer=RandomNormal(
38+
mean=0.0, stddev=init_std, seed=seed),
39+
embeddings_regularizer=l2(l2_reg),
40+
name=prefix+'_emb_' + str(i) + '-' + feat) for i, feat in
41+
enumerate(feature_dim_dict["sparse"])}
42+
else:
43+
44+
sparse_embedding = {feat: Embedding(feature_dim_dict["sparse"][feat], embedding_size,
45+
embeddings_initializer=RandomNormal(
46+
mean=0.0, stddev=init_std, seed=seed),
47+
embeddings_regularizer=l2(l2_reg),
48+
name=prefix+'_emb_' + str(i) + '-' + feat) for i, feat in
49+
enumerate(feature_dim_dict["sparse"])}
50+
51+
if 'sequence' in feature_dim_dict:
52+
count = len(sparse_embedding)
53+
sequence_dim_list = feature_dim_dict['sequence']
54+
for feat in sequence_dim_list:
55+
if feat.name not in sparse_embedding:
56+
if embedding_size == "auto":
57+
sparse_embedding[feat.name] = Embedding(feat.dimension, 6 * int(pow(feat.dimension, 0.25)),
58+
embeddings_initializer=RandomNormal(
59+
mean=0.0, stddev=init_std, seed=seed),
60+
embeddings_regularizer=l2(
61+
l2_reg),
62+
name=prefix + '_emb_' + str(count) + '-' + feat.name)
63+
64+
else:
65+
sparse_embedding[feat.name] = Embedding(feat.dimension, embedding_size,
66+
embeddings_initializer=RandomNormal(
67+
mean=0.0, stddev=init_std, seed=seed),
68+
embeddings_regularizer=l2(
69+
l2_reg),
70+
name=prefix+'_emb_' + str(count) + '-' + feat.name)
71+
72+
count += 1
73+
74+
return sparse_embedding
75+
76+
77+
def merge_dense_input(dense_input_, embed_list, embedding_size, l2_reg):
78+
dense_input = list(dense_input_.values())
79+
if len(dense_input) > 0:
80+
if embedding_size == "auto":
81+
if len(dense_input) == 1:
82+
continuous_embedding_list = dense_input[0]
83+
else:
84+
continuous_embedding_list = Concatenate()(dense_input)
85+
continuous_embedding_list = Reshape(
86+
[1, len(dense_input)])(continuous_embedding_list)
87+
embed_list.append(continuous_embedding_list)
88+
89+
else:
90+
continuous_embedding_list = list(
91+
map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg), ),
92+
dense_input))
93+
continuous_embedding_list = list(
94+
map(Reshape((1, embedding_size)), continuous_embedding_list))
95+
embed_list += continuous_embedding_list
96+
97+
return embed_list
98+
99+
100+
def merge_sequence_input(embedding_dict, embed_list, sequence_input_dict, sequence_len_dict, sequence_max_len_dict, sequence_pooling_dict):
101+
if len(sequence_input_dict) > 0:
102+
sequence_embed_dict = get_varlen_embedding_vec_dict(
103+
embedding_dict, sequence_input_dict)
104+
sequence_embed_list = get_pooling_vec_list(
105+
sequence_embed_dict, sequence_len_dict, sequence_max_len_dict, sequence_pooling_dict)
106+
embed_list += sequence_embed_list
107+
108+
return embed_list
109+
110+
111+
def get_embedding_vec_list(embedding_dict, input_dict):
112+
113+
return [embedding_dict[feat](v)
114+
for feat, v in input_dict.items()]
115+
116+
117+
def get_varlen_embedding_vec_dict(embedding_dict, input_dict):
118+
119+
return {feat: embedding_dict[feat](v)
120+
for feat, v in input_dict.items()}
121+
122+
123+
def get_pooling_vec_list(sequence_embed_dict, sequence_len_dict, sequence_max_len_dict, sequence_pooling_dict):
124+
return [SequencePoolingLayer(sequence_max_len_dict[feat], sequence_pooling_dict[feat])(
125+
[v, sequence_len_dict[feat]]) for feat, v in sequence_embed_dict.items()]
126+
127+
128+
def get_inputs_list(inputs):
129+
return list(chain(*list(map(lambda x: x.values(), inputs))))
130+
131+
132+
def get_inputs_embedding(feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed, include_linear=True):
133+
sparse_input_dict, dense_input_dict = create_input_dict(feature_dim_dict)
134+
sequence_input_dict, sequence_pooling_dict, sequence_input_len_dict, sequence_max_len_dict = create_sequence_input_dict(
135+
feature_dim_dict)
136+
137+
deep_sparse_emb_dict = create_embedding_dict(
138+
feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding)
139+
140+
deep_emb_list = get_embedding_vec_list(
141+
deep_sparse_emb_dict, sparse_input_dict)
142+
143+
deep_emb_list = merge_sequence_input(deep_sparse_emb_dict, deep_emb_list, sequence_input_dict,
144+
sequence_input_len_dict, sequence_max_len_dict, sequence_pooling_dict)
145+
146+
deep_emb_list = merge_dense_input(
147+
dense_input_dict, deep_emb_list, embedding_size, l2_reg_embedding)
148+
if include_linear:
149+
linear_sparse_emb_dict = create_embedding_dict(
150+
feature_dim_dict, 1, init_std, seed, l2_reg_linear, 'linear')
151+
linear_emb_list = get_embedding_vec_list(
152+
linear_sparse_emb_dict, sparse_input_dict)
153+
linear_emb_list = merge_sequence_input(linear_sparse_emb_dict, linear_emb_list, sequence_input_dict,
154+
sequence_input_len_dict,
155+
sequence_max_len_dict, sequence_pooling_dict)
156+
157+
linear_logit = get_linear_logit(
158+
linear_emb_list, dense_input_dict, l2_reg_linear)
159+
else:
160+
linear_logit = None
161+
162+
inputs_list = get_inputs_list(
163+
[sparse_input_dict, dense_input_dict, sequence_input_dict, sequence_input_len_dict])
164+
return deep_emb_list, linear_logit, inputs_list

deepctr/models/afm.py

+12-38
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,10 @@
99
(https://arxiv.org/abs/1708.04617)
1010
1111
"""
12-
13-
from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, add
14-
from tensorflow.python.keras.models import Model
15-
from tensorflow.python.keras.regularizers import l2
16-
17-
from ..utils import get_input, get_share_embeddings
12+
import tensorflow as tf
13+
from ..input_embedding import get_inputs_embedding
1814
from ..layers import PredictionLayer, AFMLayer, FM
15+
from ..utils import concat_fun
1916

2017

2118
def AFM(feature_dim_dict, embedding_size=8, use_attention=True, attention_factor=8,
@@ -48,41 +45,18 @@ def AFM(feature_dim_dict, embedding_size=8, use_attention=True, attention_factor
4845
raise ValueError("feature_dim_dict['dense'] must be a list,cur is", type(
4946
feature_dim_dict['dense']))
5047

51-
sparse_input, dense_input = get_input(feature_dim_dict, None)
52-
sparse_embedding, linear_embedding, = get_share_embeddings(
53-
feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear)
54-
55-
embed_list = [sparse_embedding[i](sparse_input[i])
56-
for i in range(len(sparse_input))]
57-
linear_term = [linear_embedding[i](sparse_input[i])
58-
for i in range(len(sparse_input))]
59-
if len(linear_term) > 1:
60-
linear_term = add(linear_term)
61-
elif len(linear_term) == 1:
62-
linear_term = linear_term[0]
48+
deep_emb_list, linear_logit, inputs_list = get_inputs_embedding(
49+
feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed)
6350

64-
if len(dense_input) > 0:
65-
continuous_embedding_list = list(
66-
map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ),
67-
dense_input))
68-
continuous_embedding_list = list(
69-
map(Reshape((1, embedding_size)), continuous_embedding_list))
70-
embed_list += continuous_embedding_list
71-
72-
dense_input_ = dense_input[0] if len(
73-
dense_input) == 1 else Concatenate()(dense_input)
74-
linear_dense_logit = Dense(
75-
1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_)
76-
linear_term = add([linear_dense_logit, linear_term])
77-
78-
fm_input = Concatenate(axis=1)(embed_list)
51+
fm_input = concat_fun(deep_emb_list,axis=1)
7952
if use_attention:
80-
fm_out = AFMLayer(attention_factor, l2_reg_att,
81-
keep_prob, seed)(embed_list)
53+
fm_logit = AFMLayer(attention_factor, l2_reg_att,
54+
keep_prob, seed)(deep_emb_list)
8255
else:
83-
fm_out = FM()(fm_input)
56+
fm_logit = FM()(fm_input)
8457

85-
final_logit = add([linear_term, fm_out])
58+
final_logit = tf.keras.layers.add([linear_logit, fm_logit])
8659
output = PredictionLayer(final_activation)(final_logit)
87-
model = Model(inputs=sparse_input + dense_input, outputs=output)
60+
61+
model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
8862
return model

deepctr/models/autoint.py

+16-39
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,10 @@
99
1010
"""
1111

12-
from tensorflow.python.keras.layers import Dense, Embedding, Concatenate
13-
from tensorflow.python.keras.models import Model
14-
from tensorflow.python.keras.initializers import RandomNormal
15-
from tensorflow.python.keras.regularizers import l2
1612
import tensorflow as tf
17-
18-
from ..utils import get_input
13+
from ..input_embedding import get_inputs_embedding
1914
from ..layers import PredictionLayer, MLP, InteractingLayer
15+
from ..utils import concat_fun
2016

2117

2218
def AutoInt(feature_dim_dict, embedding_size=8, att_layer_num=3, att_embedding_size=8, att_head_num=2, att_res=True, hidden_size=(256, 256), activation='relu',
@@ -48,56 +44,37 @@ def AutoInt(feature_dim_dict, embedding_size=8, att_layer_num=3, att_embedding_s
4844
raise ValueError(
4945
"feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
5046

51-
sparse_input, dense_input = get_input(feature_dim_dict, None,)
52-
sparse_embedding = get_embeddings(
53-
feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding)
54-
embed_list = [sparse_embedding[i](sparse_input[i])
55-
for i in range(len(sparse_input))]
47+
deep_emb_list, _, inputs_list = get_inputs_embedding(
48+
feature_dim_dict, embedding_size, l2_reg_embedding, 0, init_std, seed, False)
5649

57-
att_input = Concatenate(axis=1)(embed_list) if len(
58-
embed_list) > 1 else embed_list[0]
50+
att_input = concat_fun(deep_emb_list, axis=1)
5951

60-
for i in range(att_layer_num):
52+
for _ in range(att_layer_num):
6153
att_input = InteractingLayer(
6254
att_embedding_size, att_head_num, att_res)(att_input)
6355
att_output = tf.keras.layers.Flatten()(att_input)
6456

65-
deep_input = tf.keras.layers.Flatten()(Concatenate()(embed_list)
66-
if len(embed_list) > 1 else embed_list[0])
67-
if len(dense_input) > 0:
68-
if len(dense_input) == 1:
69-
continuous_list = dense_input[0]
70-
else:
71-
continuous_list = Concatenate()(dense_input)
72-
73-
deep_input = Concatenate()([deep_input, continuous_list])
57+
deep_input = tf.keras.layers.Flatten()(concat_fun(deep_emb_list))
7458

7559
if len(hidden_size) > 0 and att_layer_num > 0: # Deep & Interacting Layer
7660
deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
7761
use_bn, seed)(deep_input)
78-
stack_out = Concatenate()([att_output, deep_out])
79-
final_logit = Dense(1, use_bias=False, activation=None)(stack_out)
62+
stack_out = tf.keras.layers.Concatenate()([att_output, deep_out])
63+
final_logit = tf.keras.layers.Dense(
64+
1, use_bias=False, activation=None)(stack_out)
8065
elif len(hidden_size) > 0: # Only Deep
8166
deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
8267
use_bn, seed)(deep_input)
83-
final_logit = Dense(1, use_bias=False, activation=None)(deep_out)
68+
final_logit = tf.keras.layers.Dense(
69+
1, use_bias=False, activation=None)(deep_out)
8470
elif att_layer_num > 0: # Only Interacting Layer
85-
final_logit = Dense(1, use_bias=False, activation=None)(att_output)
71+
final_logit = tf.keras.layers.Dense(
72+
1, use_bias=False, activation=None)(att_output)
8673
else: # Error
8774
raise NotImplementedError
8875

8976
output = PredictionLayer(final_activation)(final_logit)
90-
model = Model(inputs=sparse_input + dense_input, outputs=output)
91-
92-
return model
93-
9477

95-
def get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V):
96-
sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
97-
embeddings_initializer=RandomNormal(
98-
mean=0.0, stddev=init_std, seed=seed),
99-
embeddings_regularizer=l2(l2_rev_V),
100-
name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
101-
enumerate(feature_dim_dict["sparse"])]
78+
model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
10279

103-
return sparse_embedding
80+
return model

0 commit comments

Comments
 (0)