Add IFM and DIFM model (shenweichen#353)

zanshuxun · web-flow · commit 353aa0679dc1 · 2021-05-05T18:29:27.000+08:00
Add IFM and DIFM model
diff --git a/README.md b/README.md
@@ -55,7 +55,9 @@ Let's [**Get Started!**](https://deepctr-doc.readthedocs.io/en/latest/Quick-Star
 |                FiBiNET                 | [RecSys 2019][FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf)   |
 |                FLEN                    | [arxiv 2019][FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690.pdf)   |
 |                 BST                   | [DLP-KDD 2019][Behavior sequence transformer for e-commerce recommendation in Alibaba](https://arxiv.org/pdf/1905.06874.pdf)                           | 
+|                IFM                 | [IJCAI 2019][An Input-aware Factorization Machine for Sparse Prediction](https://www.ijcai.org/Proceedings/2019/0203.pdf)   |
 |                DCN V2                    | [arxiv 2020][DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535)   |
+|                DIFM                 | [IJCAI 2020][A Dual Input-aware Factorization Machine for CTR Prediction](https://www.ijcai.org/Proceedings/2020/0434.pdf)   |
 
 ## Citation
 
diff --git a/deepctr/feature_column.py b/deepctr/feature_column.py
@@ -1,9 +1,10 @@
+import tensorflow as tf
 from collections import namedtuple, OrderedDict
 from copy import copy
 from itertools import chain
 
 from tensorflow.python.keras.initializers import RandomNormal, Zeros
-from tensorflow.python.keras.layers import Input
+from tensorflow.python.keras.layers import Input, Lambda
 
 from .inputs import create_embedding_matrix, embedding_lookup, get_dense_input, varlen_embedding_lookup, \
     get_varlen_pooling_list, mergeDict
@@ -145,7 +146,7 @@ def build_input_features(feature_columns, prefix=''):
 
 
 def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear',
-                     l2_reg=0):
+                     l2_reg=0, sparse_feat_refine_weight=None):
     linear_feature_columns = copy(feature_columns)
     for i in range(len(linear_feature_columns)):
         if isinstance(linear_feature_columns[i], SparseFeat):
@@ -166,9 +167,15 @@ def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=10
         if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0:
             sparse_input = concat_func(linear_emb_list[i])
             dense_input = concat_func(dense_input_list)
+            if sparse_feat_refine_weight is not None:
+                sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
+                    [sparse_input, sparse_feat_refine_weight])
             linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input])
         elif len(linear_emb_list[i]) > 0:
             sparse_input = concat_func(linear_emb_list[i])
+            if sparse_feat_refine_weight is not None:
+                sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
+                    [sparse_input, sparse_feat_refine_weight])
             linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input)
         elif len(dense_input_list) > 0:
             dense_input = concat_func(dense_input_list)
diff --git a/deepctr/layers/__init__.py b/deepctr/layers/__init__.py
@@ -8,9 +8,9 @@
                           FieldWiseBiInteraction, FwFMLayer)
 from .normalization import LayerNormalization
 from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM,
-                       KMaxPooling, SequencePoolingLayer,WeightedSequenceLayer,
+                       KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer,
                        Transformer, DynamicGRU)
-from .utils import NoMask, Hash,Linear,Add,combined_dnn_input
+from .utils import NoMask, Hash, Linear, Add, combined_dnn_input, softmax
 
 custom_objects = {'tf': tf,
                   'InnerProductLayer': InnerProductLayer,
@@ -36,12 +36,13 @@
                   'KMaxPooling': KMaxPooling,
                   'FGCNNLayer': FGCNNLayer,
                   'Hash': Hash,
-                  'Linear':Linear,
+                  'Linear': Linear,
                   'DynamicGRU': DynamicGRU,
-                  'SENETLayer':SENETLayer,
-                  'BilinearInteraction':BilinearInteraction,
-                  'WeightedSequenceLayer':WeightedSequenceLayer,
-                  'Add':Add,
-                  'FieldWiseBiInteraction':FieldWiseBiInteraction,
-                  'FwFMLayer': FwFMLayer
+                  'SENETLayer': SENETLayer,
+                  'BilinearInteraction': BilinearInteraction,
+                  'WeightedSequenceLayer': WeightedSequenceLayer,
+                  'Add': Add,
+                  'FieldWiseBiInteraction': FieldWiseBiInteraction,
+                  'FwFMLayer': FwFMLayer,
+                  'softmax': softmax,
                   }
diff --git a/deepctr/layers/interaction.py b/deepctr/layers/interaction.py
@@ -700,13 +700,14 @@ class InteractingLayer(Layer):
             - [Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.](https://arxiv.org/abs/1810.11921)
     """
 
-    def __init__(self, att_embedding_size=8, head_num=2, use_res=True, seed=1024, **kwargs):
+    def __init__(self, att_embedding_size=8, head_num=2, use_res=True, scaling=False, seed=1024, **kwargs):
         if head_num <= 0:
             raise ValueError('head_num must be a int > 0')
         self.att_embedding_size = att_embedding_size
         self.head_num = head_num
         self.use_res = use_res
         self.seed = seed
+        self.scaling = scaling
         super(InteractingLayer, self).__init__(**kwargs)
 
     def build(self, input_shape):
@@ -748,6 +749,8 @@ def call(self, inputs, **kwargs):
 
         inner_product = tf.matmul(
             querys, keys, transpose_b=True)  # head_num None F F
+        if self.scaling:
+            inner_product /= self.att_embedding_size ** 0.5
         self.normalized_att_scores = softmax(inner_product)
 
         result = tf.matmul(self.normalized_att_scores,
diff --git a/deepctr/models/__init__.py b/deepctr/models/__init__.py
@@ -4,6 +4,8 @@
 from .dcn import DCN
 from .dcnmix import DCNMix
 from .deepfm import DeepFM
+from .ifm import IFM
+from .difm import DIFM
 from .dien import DIEN
 from .din import DIN
 from .fnn import FNN
@@ -20,5 +22,5 @@
 from .fwfm import FwFM
 from .bst import BST
 
-__all__ = ["AFM", "CCPM", "DCN", "DCNMix", "MLR",  "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
+__all__ = ["AFM", "CCPM", "DCN", "IFM", "DIFM", "DCNMix", "MLR",  "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
            "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST"]
diff --git a/deepctr/models/difm.py b/deepctr/models/difm.py
@@ -0,0 +1,83 @@
+# -*- coding:utf-8 -*-
+"""
+Author:
+    zanshuxun, zanshuxun@aliyun.com
+Reference:
+    [1] Lu W, Yu Y, Chang Y, et al. A Dual Input-aware Factorization Machine for CTR Prediction[C]
+    //IJCAI. 2020: 3139-3145.(https://www.ijcai.org/Proceedings/2020/0434.pdf)
+"""
+
+import tensorflow as tf
+
+from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns, SparseFeat, \
+    VarLenSparseFeat
+from ..layers.core import PredictionLayer, DNN
+from ..layers.interaction import FM, InteractingLayer
+from ..layers.utils import concat_func, add_func, combined_dnn_input
+
+
+def DIFM(linear_feature_columns, dnn_feature_columns,
+         att_embedding_size=8, att_head_num=8, att_res=True, dnn_hidden_units=(128, 128),
+         l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
+         dnn_activation='relu', dnn_use_bn=False, task='binary'):
+    """Instantiates the DIFM Network architecture.
+
+    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
+    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
+    :param att_embedding_size: integer, the embedding size in multi-head self-attention network.
+    :param att_head_num: int. The head number in multi-head  self-attention network.
+    :param att_res: bool. Whether or not use standard residual connections before output.
+    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
+    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
+    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
+    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
+    :param seed: integer ,to use as random seed.
+    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
+    :param dnn_activation: Activation function to use in DNN
+    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
+    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
+    :return: A Keras model instance.
+    """
+
+    if not len(dnn_hidden_units) > 0:
+        raise ValueError("dnn_hidden_units is null!")
+
+    features = build_input_features(
+        linear_feature_columns + dnn_feature_columns)
+
+    sparse_feat_num = len(list(filter(lambda x: isinstance(x, SparseFeat) or isinstance(x, VarLenSparseFeat),
+                                      dnn_feature_columns)))
+    inputs_list = list(features.values())
+
+    sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns,
+                                                          l2_reg_embedding, seed)
+
+    if not len(sparse_embedding_list) > 0:
+        raise ValueError("there are no sparse features")
+
+    att_input = concat_func(sparse_embedding_list, axis=1)
+    att_out = InteractingLayer(att_embedding_size, att_head_num, att_res, scaling=True)(att_input)
+    att_out = tf.keras.layers.Flatten()(att_out)
+    m_vec = tf.keras.layers.Dense(
+        sparse_feat_num, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(att_out)
+
+    dnn_input = combined_dnn_input(sparse_embedding_list, [])
+    dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
+    m_bit = tf.keras.layers.Dense(
+        sparse_feat_num, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output)
+
+    input_aware_factor = add_func([m_vec, m_bit])  # the complete input-aware factor m_x
+
+    linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
+                                    l2_reg=l2_reg_linear, sparse_feat_refine_weight=input_aware_factor)
+
+    fm_input = concat_func(sparse_embedding_list, axis=1)
+    refined_fm_input = tf.keras.layers.Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=-1))(
+        [fm_input, input_aware_factor])
+    fm_logit = FM()(refined_fm_input)
+
+    final_logit = add_func([linear_logit, fm_logit])
+
+    output = PredictionLayer(task)(final_logit)
+    model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
+    return model
diff --git a/deepctr/models/ifm.py b/deepctr/models/ifm.py
@@ -0,0 +1,74 @@
+# -*- coding:utf-8 -*-
+"""
+Author:
+    zanshuxun, zanshuxun@aliyun.com
+Reference:
+    [1] Yu Y, Wang Z, Yuan B. An Input-aware Factorization Machine for Sparse Prediction[C]//IJCAI. 2019: 1466-1472.
+    (https://www.ijcai.org/Proceedings/2019/0203.pdf)
+"""
+
+import tensorflow as tf
+from tensorflow.python.keras.layers import Lambda
+
+from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns, SparseFeat, \
+    VarLenSparseFeat
+from ..layers.core import PredictionLayer, DNN
+from ..layers.interaction import FM
+from ..layers.utils import concat_func, add_func, combined_dnn_input, softmax
+
+
+def IFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128),
+        l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
+        dnn_activation='relu', dnn_use_bn=False, task='binary'):
+    """Instantiates the IFM Network architecture.
+
+    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
+    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
+    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
+    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
+    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
+    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
+    :param seed: integer ,to use as random seed.
+    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
+    :param dnn_activation: Activation function to use in DNN
+    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
+    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
+    :return: A Keras model instance.
+    """
+
+    if not len(dnn_hidden_units) > 0:
+        raise ValueError("dnn_hidden_units is null!")
+
+    features = build_input_features(
+        linear_feature_columns + dnn_feature_columns)
+
+    sparse_feat_num = len(list(filter(lambda x: isinstance(x, SparseFeat) or isinstance(x, VarLenSparseFeat),
+                                      dnn_feature_columns)))
+    inputs_list = list(features.values())
+
+    sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns,
+                                                                         l2_reg_embedding, seed)
+    if not len(sparse_embedding_list) > 0:
+        raise ValueError("there are no sparse features")
+
+    dnn_input = combined_dnn_input(sparse_embedding_list, [])
+    dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
+    # here, dnn_output is the m'_{x}
+    dnn_output = tf.keras.layers.Dense(
+        sparse_feat_num, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output)
+    # input_aware_factor m_{x,i}
+    input_aware_factor = Lambda(lambda x: tf.cast(tf.shape(x)[-1], tf.float32) * softmax(x, dim=1))(dnn_output)
+
+    linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
+                                    l2_reg=l2_reg_linear, sparse_feat_refine_weight=input_aware_factor)
+
+    fm_input = concat_func(sparse_embedding_list, axis=1)
+    refined_fm_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=-1))(
+        [fm_input, input_aware_factor])
+    fm_logit = FM()(refined_fm_input)
+
+    final_logit = add_func([linear_logit, fm_logit])
+
+    output = PredictionLayer(task)(final_logit)
+    model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
+    return model
diff --git a/tests/models/DIFM_test.py b/tests/models/DIFM_test.py
@@ -0,0 +1,22 @@
+import pytest
+
+from deepctr.models import DIFM
+from ..utils import check_model, get_test_data, SAMPLE_SIZE
+
+
+@pytest.mark.parametrize(
+    'att_head_num,dnn_hidden_units,sparse_feature_num',
+    [(1, (4,), 2), (2, (4, 4,), 2), (1, (4,), 1)]
+)
+def test_DIFM(att_head_num, dnn_hidden_units, sparse_feature_num):
+    model_name = "DIFM"
+    sample_size = SAMPLE_SIZE
+    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
+                                          dense_feature_num=sparse_feature_num)
+
+    model = DIFM(feature_columns, feature_columns, dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5)
+    check_model(model, model_name, x, y)
+
+
+if __name__ == "__main__":
+    pass
diff --git a/tests/models/IFM_test.py b/tests/models/IFM_test.py
@@ -0,0 +1,24 @@
+import pytest
+
+from deepctr.models import IFM
+from ..utils import check_model, get_test_data, SAMPLE_SIZE
+
+
+@pytest.mark.parametrize(
+    'hidden_size,sparse_feature_num',
+    [((2,), 1),
+     ((3,), 2)
+     ]
+)
+def test_IFM(hidden_size, sparse_feature_num):
+    model_name = "IFM"
+    sample_size = SAMPLE_SIZE
+    x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
+                                          dense_feature_num=sparse_feature_num)
+
+    model = IFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
+    check_model(model, model_name, x, y)
+
+
+if __name__ == "__main__":
+    pass