Skip to content

Commit 353aa06

Browse files
authored
Add IFM and DIFM model (shenweichen#353)
Add IFM and DIFM model
1 parent a94a8ec commit 353aa06

File tree

9 files changed

+231
-13
lines changed

9 files changed

+231
-13
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ Let's [**Get Started!**](https://deepctr-doc.readthedocs.io/en/latest/Quick-Star
5555
| FiBiNET | [RecSys 2019][FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) |
5656
| FLEN | [arxiv 2019][FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690.pdf) |
5757
| BST | [DLP-KDD 2019][Behavior sequence transformer for e-commerce recommendation in Alibaba](https://arxiv.org/pdf/1905.06874.pdf) |
58+
| IFM | [IJCAI 2019][An Input-aware Factorization Machine for Sparse Prediction](https://www.ijcai.org/Proceedings/2019/0203.pdf) |
5859
| DCN V2 | [arxiv 2020][DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535) |
60+
| DIFM | [IJCAI 2020][A Dual Input-aware Factorization Machine for CTR Prediction](https://www.ijcai.org/Proceedings/2020/0434.pdf) |
5961

6062
## Citation
6163

deepctr/feature_column.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
import tensorflow as tf
12
from collections import namedtuple, OrderedDict
23
from copy import copy
34
from itertools import chain
45

56
from tensorflow.python.keras.initializers import RandomNormal, Zeros
6-
from tensorflow.python.keras.layers import Input
7+
from tensorflow.python.keras.layers import Input, Lambda
78

89
from .inputs import create_embedding_matrix, embedding_lookup, get_dense_input, varlen_embedding_lookup, \
910
get_varlen_pooling_list, mergeDict
@@ -145,7 +146,7 @@ def build_input_features(feature_columns, prefix=''):
145146

146147

147148
def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear',
148-
l2_reg=0):
149+
l2_reg=0, sparse_feat_refine_weight=None):
149150
linear_feature_columns = copy(feature_columns)
150151
for i in range(len(linear_feature_columns)):
151152
if isinstance(linear_feature_columns[i], SparseFeat):
@@ -166,9 +167,15 @@ def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=10
166167
if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0:
167168
sparse_input = concat_func(linear_emb_list[i])
168169
dense_input = concat_func(dense_input_list)
170+
if sparse_feat_refine_weight is not None:
171+
sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
172+
[sparse_input, sparse_feat_refine_weight])
169173
linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input])
170174
elif len(linear_emb_list[i]) > 0:
171175
sparse_input = concat_func(linear_emb_list[i])
176+
if sparse_feat_refine_weight is not None:
177+
sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
178+
[sparse_input, sparse_feat_refine_weight])
172179
linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input)
173180
elif len(dense_input_list) > 0:
174181
dense_input = concat_func(dense_input_list)

deepctr/layers/__init__.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
FieldWiseBiInteraction, FwFMLayer)
99
from .normalization import LayerNormalization
1010
from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM,
11-
KMaxPooling, SequencePoolingLayer,WeightedSequenceLayer,
11+
KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer,
1212
Transformer, DynamicGRU)
13-
from .utils import NoMask, Hash,Linear,Add,combined_dnn_input
13+
from .utils import NoMask, Hash, Linear, Add, combined_dnn_input, softmax
1414

1515
custom_objects = {'tf': tf,
1616
'InnerProductLayer': InnerProductLayer,
@@ -36,12 +36,13 @@
3636
'KMaxPooling': KMaxPooling,
3737
'FGCNNLayer': FGCNNLayer,
3838
'Hash': Hash,
39-
'Linear':Linear,
39+
'Linear': Linear,
4040
'DynamicGRU': DynamicGRU,
41-
'SENETLayer':SENETLayer,
42-
'BilinearInteraction':BilinearInteraction,
43-
'WeightedSequenceLayer':WeightedSequenceLayer,
44-
'Add':Add,
45-
'FieldWiseBiInteraction':FieldWiseBiInteraction,
46-
'FwFMLayer': FwFMLayer
41+
'SENETLayer': SENETLayer,
42+
'BilinearInteraction': BilinearInteraction,
43+
'WeightedSequenceLayer': WeightedSequenceLayer,
44+
'Add': Add,
45+
'FieldWiseBiInteraction': FieldWiseBiInteraction,
46+
'FwFMLayer': FwFMLayer,
47+
'softmax': softmax,
4748
}

deepctr/layers/interaction.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -700,13 +700,14 @@ class InteractingLayer(Layer):
700700
- [Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.](https://arxiv.org/abs/1810.11921)
701701
"""
702702

703-
def __init__(self, att_embedding_size=8, head_num=2, use_res=True, seed=1024, **kwargs):
703+
def __init__(self, att_embedding_size=8, head_num=2, use_res=True, scaling=False, seed=1024, **kwargs):
704704
if head_num <= 0:
705705
raise ValueError('head_num must be a int > 0')
706706
self.att_embedding_size = att_embedding_size
707707
self.head_num = head_num
708708
self.use_res = use_res
709709
self.seed = seed
710+
self.scaling = scaling
710711
super(InteractingLayer, self).__init__(**kwargs)
711712

712713
def build(self, input_shape):
@@ -748,6 +749,8 @@ def call(self, inputs, **kwargs):
748749

749750
inner_product = tf.matmul(
750751
querys, keys, transpose_b=True) # head_num None F F
752+
if self.scaling:
753+
inner_product /= self.att_embedding_size ** 0.5
751754
self.normalized_att_scores = softmax(inner_product)
752755

753756
result = tf.matmul(self.normalized_att_scores,

deepctr/models/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from .dcn import DCN
55
from .dcnmix import DCNMix
66
from .deepfm import DeepFM
7+
from .ifm import IFM
8+
from .difm import DIFM
79
from .dien import DIEN
810
from .din import DIN
911
from .fnn import FNN
@@ -20,5 +22,5 @@
2022
from .fwfm import FwFM
2123
from .bst import BST
2224

23-
__all__ = ["AFM", "CCPM", "DCN", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
25+
__all__ = ["AFM", "CCPM", "DCN", "IFM", "DIFM", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
2426
"WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST"]

deepctr/models/difm.py

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# -*- coding:utf-8 -*-
2+
"""
3+
Author:
4+
zanshuxun, [email protected]
5+
Reference:
6+
[1] Lu W, Yu Y, Chang Y, et al. A Dual Input-aware Factorization Machine for CTR Prediction[C]
7+
//IJCAI. 2020: 3139-3145.(https://www.ijcai.org/Proceedings/2020/0434.pdf)
8+
"""
9+
10+
import tensorflow as tf
11+
12+
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns, SparseFeat, \
13+
VarLenSparseFeat
14+
from ..layers.core import PredictionLayer, DNN
15+
from ..layers.interaction import FM, InteractingLayer
16+
from ..layers.utils import concat_func, add_func, combined_dnn_input
17+
18+
19+
def DIFM(linear_feature_columns, dnn_feature_columns,
20+
att_embedding_size=8, att_head_num=8, att_res=True, dnn_hidden_units=(128, 128),
21+
l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
22+
dnn_activation='relu', dnn_use_bn=False, task='binary'):
23+
"""Instantiates the DIFM Network architecture.
24+
25+
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
26+
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
27+
:param att_embedding_size: integer, the embedding size in multi-head self-attention network.
28+
:param att_head_num: int. The head number in multi-head self-attention network.
29+
:param att_res: bool. Whether or not use standard residual connections before output.
30+
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
31+
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
32+
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
33+
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
34+
:param seed: integer ,to use as random seed.
35+
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
36+
:param dnn_activation: Activation function to use in DNN
37+
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
38+
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
39+
:return: A Keras model instance.
40+
"""
41+
42+
if not len(dnn_hidden_units) > 0:
43+
raise ValueError("dnn_hidden_units is null!")
44+
45+
features = build_input_features(
46+
linear_feature_columns + dnn_feature_columns)
47+
48+
sparse_feat_num = len(list(filter(lambda x: isinstance(x, SparseFeat) or isinstance(x, VarLenSparseFeat),
49+
dnn_feature_columns)))
50+
inputs_list = list(features.values())
51+
52+
sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns,
53+
l2_reg_embedding, seed)
54+
55+
if not len(sparse_embedding_list) > 0:
56+
raise ValueError("there are no sparse features")
57+
58+
att_input = concat_func(sparse_embedding_list, axis=1)
59+
att_out = InteractingLayer(att_embedding_size, att_head_num, att_res, scaling=True)(att_input)
60+
att_out = tf.keras.layers.Flatten()(att_out)
61+
m_vec = tf.keras.layers.Dense(
62+
sparse_feat_num, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(att_out)
63+
64+
dnn_input = combined_dnn_input(sparse_embedding_list, [])
65+
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
66+
m_bit = tf.keras.layers.Dense(
67+
sparse_feat_num, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output)
68+
69+
input_aware_factor = add_func([m_vec, m_bit]) # the complete input-aware factor m_x
70+
71+
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
72+
l2_reg=l2_reg_linear, sparse_feat_refine_weight=input_aware_factor)
73+
74+
fm_input = concat_func(sparse_embedding_list, axis=1)
75+
refined_fm_input = tf.keras.layers.Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=-1))(
76+
[fm_input, input_aware_factor])
77+
fm_logit = FM()(refined_fm_input)
78+
79+
final_logit = add_func([linear_logit, fm_logit])
80+
81+
output = PredictionLayer(task)(final_logit)
82+
model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
83+
return model

deepctr/models/ifm.py

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# -*- coding:utf-8 -*-
2+
"""
3+
Author:
4+
zanshuxun, [email protected]
5+
Reference:
6+
[1] Yu Y, Wang Z, Yuan B. An Input-aware Factorization Machine for Sparse Prediction[C]//IJCAI. 2019: 1466-1472.
7+
(https://www.ijcai.org/Proceedings/2019/0203.pdf)
8+
"""
9+
10+
import tensorflow as tf
11+
from tensorflow.python.keras.layers import Lambda
12+
13+
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns, SparseFeat, \
14+
VarLenSparseFeat
15+
from ..layers.core import PredictionLayer, DNN
16+
from ..layers.interaction import FM
17+
from ..layers.utils import concat_func, add_func, combined_dnn_input, softmax
18+
19+
20+
def IFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128),
21+
l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
22+
dnn_activation='relu', dnn_use_bn=False, task='binary'):
23+
"""Instantiates the IFM Network architecture.
24+
25+
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
26+
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
27+
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
28+
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
29+
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
30+
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
31+
:param seed: integer ,to use as random seed.
32+
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
33+
:param dnn_activation: Activation function to use in DNN
34+
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
35+
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
36+
:return: A Keras model instance.
37+
"""
38+
39+
if not len(dnn_hidden_units) > 0:
40+
raise ValueError("dnn_hidden_units is null!")
41+
42+
features = build_input_features(
43+
linear_feature_columns + dnn_feature_columns)
44+
45+
sparse_feat_num = len(list(filter(lambda x: isinstance(x, SparseFeat) or isinstance(x, VarLenSparseFeat),
46+
dnn_feature_columns)))
47+
inputs_list = list(features.values())
48+
49+
sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns,
50+
l2_reg_embedding, seed)
51+
if not len(sparse_embedding_list) > 0:
52+
raise ValueError("there are no sparse features")
53+
54+
dnn_input = combined_dnn_input(sparse_embedding_list, [])
55+
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
56+
# here, dnn_output is the m'_{x}
57+
dnn_output = tf.keras.layers.Dense(
58+
sparse_feat_num, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output)
59+
# input_aware_factor m_{x,i}
60+
input_aware_factor = Lambda(lambda x: tf.cast(tf.shape(x)[-1], tf.float32) * softmax(x, dim=1))(dnn_output)
61+
62+
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
63+
l2_reg=l2_reg_linear, sparse_feat_refine_weight=input_aware_factor)
64+
65+
fm_input = concat_func(sparse_embedding_list, axis=1)
66+
refined_fm_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=-1))(
67+
[fm_input, input_aware_factor])
68+
fm_logit = FM()(refined_fm_input)
69+
70+
final_logit = add_func([linear_logit, fm_logit])
71+
72+
output = PredictionLayer(task)(final_logit)
73+
model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
74+
return model

tests/models/DIFM_test.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import pytest
2+
3+
from deepctr.models import DIFM
4+
from ..utils import check_model, get_test_data, SAMPLE_SIZE
5+
6+
7+
@pytest.mark.parametrize(
8+
'att_head_num,dnn_hidden_units,sparse_feature_num',
9+
[(1, (4,), 2), (2, (4, 4,), 2), (1, (4,), 1)]
10+
)
11+
def test_DIFM(att_head_num, dnn_hidden_units, sparse_feature_num):
12+
model_name = "DIFM"
13+
sample_size = SAMPLE_SIZE
14+
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
15+
dense_feature_num=sparse_feature_num)
16+
17+
model = DIFM(feature_columns, feature_columns, dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5)
18+
check_model(model, model_name, x, y)
19+
20+
21+
if __name__ == "__main__":
22+
pass

tests/models/IFM_test.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import pytest
2+
3+
from deepctr.models import IFM
4+
from ..utils import check_model, get_test_data, SAMPLE_SIZE
5+
6+
7+
@pytest.mark.parametrize(
8+
'hidden_size,sparse_feature_num',
9+
[((2,), 1),
10+
((3,), 2)
11+
]
12+
)
13+
def test_IFM(hidden_size, sparse_feature_num):
14+
model_name = "IFM"
15+
sample_size = SAMPLE_SIZE
16+
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
17+
dense_feature_num=sparse_feature_num)
18+
19+
model = IFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
20+
check_model(model, model_name, x, y)
21+
22+
23+
if __name__ == "__main__":
24+
pass

0 commit comments

Comments
 (0)