1
+ from collections import OrderedDict
1
2
from itertools import chain
2
3
3
- from tensorflow .python .keras import Input
4
4
from tensorflow .python .keras .initializers import RandomNormal
5
- from tensorflow .python .keras .layers import Embedding , Dense , Reshape , Concatenate
5
+ from tensorflow .python .keras .layers import Embedding , Dense , Reshape , Concatenate , Input , add
6
6
from tensorflow .python .keras .regularizers import l2
7
7
from .sequence import SequencePoolingLayer
8
- from .utils import get_linear_logit
9
8
10
9
11
10
def create_input_dict (feature_dim_dict , prefix = '' ):
12
- sparse_input = {feat : Input (shape = (1 ,), name = prefix + 'sparse_' + str (i ) + '-' + feat ) for i , feat in
13
- enumerate (feature_dim_dict ["sparse" ])}
14
- dense_input = {feat : Input (shape = (1 ,), name = prefix + 'dense_' + str (i ) + '-' + feat ) for i , feat in
15
- enumerate (feature_dim_dict ["dense" ])}
11
+ sparse_input = OrderedDict ()
12
+ for i , feat in enumerate (feature_dim_dict ["sparse" ]):
13
+ sparse_input [feat .name ] = Input (
14
+ shape = (1 ,), name = prefix + 'sparse_' + str (i ) + '-' + feat .name )
15
+
16
+ dense_input = OrderedDict ()
17
+
18
+ for i , feat in enumerate (feature_dim_dict ["dense" ]):
19
+ dense_input [feat ] = Input (
20
+ shape = (1 ,), name = prefix + 'dense_' + str (i ) + '-' + feat .name )
21
+
16
22
return sparse_input , dense_input
17
23
18
24
19
- def create_sequence_input_dict (feature_dim_dict ):
25
+ def create_sequence_input_dict (feature_dim_dict , mask_zero = True ):
20
26
21
27
sequence_dim_dict = feature_dim_dict .get ('sequence' , [])
22
28
sequence_input_dict = {feat .name : Input (shape = (feat .maxlen ,), name = 'seq_' + str (
23
29
i ) + '-' + feat .name ) for i , feat in enumerate (sequence_dim_dict )}
24
30
sequence_pooling_dict = {feat .name : feat .combiner
25
31
for i , feat in enumerate (sequence_dim_dict )}
26
- sequence_len_dict = {feat .name : Input (shape = (
27
- 1 ,), name = 'seq_length' + str (i )+ '-' + feat .name ) for i , feat in enumerate (sequence_dim_dict )}
28
- sequence_max_len_dict = {feat .name : feat .maxlen
29
- for i , feat in enumerate (sequence_dim_dict )}
32
+ if mask_zero :
33
+ sequence_len_dict , sequence_max_len_dict = None , None
34
+ else :
35
+ sequence_len_dict = {feat .name : Input (shape = (
36
+ 1 ,), name = 'seq_length' + str (i )+ '-' + feat .name ) for i , feat in enumerate (sequence_dim_dict )}
37
+ sequence_max_len_dict = {feat .name : feat .maxlen
38
+ for i , feat in enumerate (sequence_dim_dict )}
30
39
return sequence_input_dict , sequence_pooling_dict , sequence_len_dict , sequence_max_len_dict
31
40
32
41
33
- def create_embedding_dict (feature_dim_dict , embedding_size , init_std , seed , l2_reg , prefix = 'sparse' ):
42
+ def create_embedding_dict (feature_dim_dict , embedding_size , init_std , seed , l2_reg , prefix = 'sparse' , seq_mask_zero = True ):
34
43
if embedding_size == 'auto' :
35
44
36
- sparse_embedding = {feat : Embedding (feature_dim_dict [ "sparse" ][ feat ] , 6 * int (pow (feature_dim_dict [ "sparse" ][ feat ] , 0.25 )),
37
- embeddings_initializer = RandomNormal (
45
+ sparse_embedding = {feat . name : Embedding (feat . dimension , 6 * int (pow (feat . dimension , 0.25 )),
46
+ embeddings_initializer = RandomNormal (
38
47
mean = 0.0 , stddev = init_std , seed = seed ),
39
48
embeddings_regularizer = l2 (l2_reg ),
40
- name = prefix + '_emb_' + str (i ) + '-' + feat ) for i , feat in
49
+ name = prefix + '_emb_' + str (i ) + '-' + feat . name ) for i , feat in
41
50
enumerate (feature_dim_dict ["sparse" ])}
42
51
else :
43
52
44
- sparse_embedding = {feat : Embedding (feature_dim_dict ["sparse" ][feat ], embedding_size ,
45
- embeddings_initializer = RandomNormal (
46
- mean = 0.0 , stddev = init_std , seed = seed ),
47
- embeddings_regularizer = l2 (l2_reg ),
48
- name = prefix + '_emb_' + str (i ) + '-' + feat ) for i , feat in
49
- enumerate (feature_dim_dict ["sparse" ])}
53
+ sparse_embedding = {feat .name : Embedding (feat .dimension , embedding_size ,
54
+ embeddings_initializer = RandomNormal (
55
+ mean = 0.0 , stddev = init_std , seed = seed ),
56
+ embeddings_regularizer = l2 (
57
+ l2_reg ),
58
+ name = prefix + '_emb_' + str (i ) + '-' + feat .name ) for i , feat in
59
+ enumerate (feature_dim_dict ["sparse" ])}
50
60
51
61
if 'sequence' in feature_dim_dict :
52
62
count = len (sparse_embedding )
53
63
sequence_dim_list = feature_dim_dict ['sequence' ]
54
64
for feat in sequence_dim_list :
55
- if feat .name not in sparse_embedding :
56
- if embedding_size == "auto" :
57
- sparse_embedding [feat .name ] = Embedding (feat .dimension , 6 * int (pow (feat .dimension , 0.25 )),
58
- embeddings_initializer = RandomNormal (
59
- mean = 0.0 , stddev = init_std , seed = seed ),
60
- embeddings_regularizer = l2 (
61
- l2_reg ),
62
- name = prefix + '_emb_' + str (count ) + '-' + feat .name )
63
-
64
- else :
65
- sparse_embedding [feat .name ] = Embedding (feat .dimension , embedding_size ,
66
- embeddings_initializer = RandomNormal (
67
- mean = 0.0 , stddev = init_std , seed = seed ),
68
- embeddings_regularizer = l2 (
69
- l2_reg ),
70
- name = prefix + '_emb_' + str (count ) + '-' + feat .name )
71
-
72
- count += 1
65
+ # if feat.name not in sparse_embedding:
66
+ if embedding_size == "auto" :
67
+ sparse_embedding [feat .name ] = Embedding (feat .dimension , 6 * int (pow (feat .dimension , 0.25 )),
68
+ embeddings_initializer = RandomNormal (
69
+ mean = 0.0 , stddev = init_std , seed = seed ),
70
+ embeddings_regularizer = l2 (
71
+ l2_reg ),
72
+ name = prefix + '_emb_' + str (count ) + '-' + feat .name , mask_zero = seq_mask_zero )
73
+
74
+ else :
75
+ sparse_embedding [feat .name ] = Embedding (feat .dimension , embedding_size ,
76
+ embeddings_initializer = RandomNormal (
77
+ mean = 0.0 , stddev = init_std , seed = seed ),
78
+ embeddings_regularizer = l2 (
79
+ l2_reg ),
80
+ name = prefix + '_emb_' + str (count ) + '-' + feat .name , mask_zero = seq_mask_zero )
81
+
82
+ count += 1
73
83
74
84
return sparse_embedding
75
85
@@ -109,7 +119,6 @@ def merge_sequence_input(embedding_dict, embed_list, sequence_input_dict, sequen
109
119
110
120
111
121
def get_embedding_vec_list (embedding_dict , input_dict ):
112
-
113
122
return [embedding_dict [feat ](v )
114
123
for feat , v in input_dict .items ()]
115
124
@@ -121,12 +130,15 @@ def get_varlen_embedding_vec_dict(embedding_dict, input_dict):
121
130
122
131
123
132
def get_pooling_vec_list (sequence_embed_dict , sequence_len_dict , sequence_max_len_dict , sequence_pooling_dict ):
124
- return [SequencePoolingLayer (sequence_max_len_dict [feat ], sequence_pooling_dict [feat ])(
125
- [v , sequence_len_dict [feat ]]) for feat , v in sequence_embed_dict .items ()]
133
+ if sequence_max_len_dict is None or sequence_len_dict is None :
134
+ return [SequencePoolingLayer (- 1 , sequence_pooling_dict [feat ])(v ) for feat , v in sequence_embed_dict .items ()]
135
+ else :
136
+ return [SequencePoolingLayer (sequence_max_len_dict [feat ], sequence_pooling_dict [feat ])(
137
+ [v , sequence_len_dict [feat ]]) for feat , v in sequence_embed_dict .items ()]
126
138
127
139
128
140
def get_inputs_list (inputs ):
129
- return list (chain (* list (map (lambda x : x .values (), inputs ))))
141
+ return list (chain (* list (map (lambda x : x .values (), filter ( lambda x : x is not None , inputs ) ))))
130
142
131
143
132
144
def get_inputs_embedding (feature_dim_dict , embedding_size , l2_reg_embedding , l2_reg_linear , init_std , seed , include_linear = True ):
@@ -162,3 +174,25 @@ def get_inputs_embedding(feature_dim_dict, embedding_size, l2_reg_embedding, l2_
162
174
inputs_list = get_inputs_list (
163
175
[sparse_input_dict , dense_input_dict , sequence_input_dict , sequence_input_len_dict ])
164
176
return deep_emb_list , linear_logit , inputs_list
177
+
178
+
179
+ def get_linear_logit (linear_term , dense_input_ , l2_reg ):
180
+ if len (linear_term ) > 1 :
181
+ linear_term = add (linear_term )
182
+ elif len (linear_term ) == 1 :
183
+ linear_term = linear_term [0 ]
184
+ else :
185
+ linear_term = None
186
+
187
+ dense_input = list (dense_input_ .values ())
188
+ if len (dense_input ) > 0 :
189
+ dense_input__ = dense_input [0 ] if len (
190
+ dense_input ) == 1 else Concatenate ()(dense_input )
191
+ linear_dense_logit = Dense (
192
+ 1 , activation = None , use_bias = False , kernel_regularizer = l2 (l2_reg ))(dense_input__ )
193
+ if linear_term is not None :
194
+ linear_term = add ([linear_dense_logit , linear_term ])
195
+ else :
196
+ linear_term = linear_dense_logit
197
+
198
+ return linear_term
0 commit comments