613he
diff --git a/‎.github/ISSUE_TEMPLATE/bug_report.md
+4-4 b/‎.github/ISSUE_TEMPLATE/bug_report.md
+4-4
diff --git a/‎.github/ISSUE_TEMPLATE/question.md
+2-2 b/‎.github/ISSUE_TEMPLATE/question.md
+2-2
diff --git a/‎.github/workflows/ci.yml
+5-3 b/‎.github/workflows/ci.yml
+5-3
diff --git a/‎README.md
+6-3 b/‎README.md
+6-3
diff --git a/‎deepctr/__init__.py
+1-1 b/‎deepctr/__init__.py
+1-1
diff --git a/‎deepctr/estimator/models/__init__.py
+1 b/‎deepctr/estimator/models/__init__.py
+1
diff --git a/‎deepctr/estimator/models/deepfefm.py
+89 b/‎deepctr/estimator/models/deepfefm.py
+89
diff --git a/‎deepctr/estimator/utils.py
+15-6 b/‎deepctr/estimator/utils.py
+15-6
diff --git a/‎deepctr/feature_column.py
+12-6 b/‎deepctr/feature_column.py
+12-6
diff --git a/‎deepctr/layers/__init__.py
+16-11 b/‎deepctr/layers/__init__.py
+16-11
diff --git a/‎deepctr/layers/core.py
+5-2 b/‎deepctr/layers/core.py
+5-2
@@ -8,7 +8,7 @@ assignees: ''
 ---
 
 **Describe the bug(问题描述)**
-A clear and concise description of what the bug is.
+A clear and concise description of what the bug is.Better with standalone code to reproduce the issue.
 
 **To Reproduce(复现步骤)**
 Steps to reproduce the behavior:
@@ -18,9 +18,9 @@ Steps to reproduce the behavior:
 4. See error
 
 **Operating environment(运行环境):**
- - python version [e.g. 3.5, 3.7]
- - tensorflow version [e.g. 1.4.0, 1.15.0, 2.4.0]
- - deepctr version [e.g. 0.8.3,]
+ - python version [e.g. 3.6, 3.7]
+ - tensorflow version [e.g. 1.4.0, 1.15.0, 2.5.0]
+ - deepctr version [e.g. 0.8.6,]
 
 **Additional context**
 Add any other context about the problem here.
@@ -16,5 +16,5 @@ Add any other context about the problem here.
 
 **Operating environment(运行环境):**
  - python version [e.g. 3.6]
- - tensorflow version [e.g. 1.4.0, 1.5.0, 2.4.0]
- - deepctr version [e.g. 0.8.3,]
+ - tensorflow version [e.g. 1.4.0, 1.15.0, 2.5.0]
+ - deepctr version [e.g. 0.8.6,]
@@ -14,15 +14,17 @@ jobs:
   build:
 
     runs-on: ubuntu-latest
-    timeout-minutes: 120
+    timeout-minutes: 180
     strategy:
       matrix:
         python-version: [3.6,3.7]
-        tf-version: [1.4.0,1.15.0,2.1.0,2.4.0]
+        tf-version: [1.4.0,1.15.0,2.1.0,2.5.0]
 
         exclude:
           - python-version: 3.7
             tf-version: 1.4.0
+          - python-version: 3.7
+            tf-version: 1.15.0
 
     steps:
 
@@ -40,7 +42,7 @@ jobs:
         pip install -q requests
         pip install -e .
     - name: Test with pytest
-      timeout-minutes: 120
+      timeout-minutes: 180
       run: |
         pip install -q pytest
         pip install -q pytest-cov
 
@@ -11,7 +11,7 @@
 
 [![Documentation Status](https://readthedocs.org/projects/deepctr-doc/badge/?version=latest)](https://deepctr-doc.readthedocs.io/)
 ![CI status](https://github.com/shenweichen/deepctr/workflows/CI/badge.svg)
-[![Coverage Status](https://coveralls.io/repos/github/shenweichen/DeepCTR/badge.svg?branch=master)](https://coveralls.io/github/shenweichen/DeepCTR?branch=master)
+[![codecov](https://codecov.io/gh/shenweichen/DeepCTR/branch/master/graph/badge.svg)](https://codecov.io/gh/shenweichen/DeepCTR)
 [![Codacy Badge](https://api.codacy.com/project/badge/Grade/d4099734dc0e4bab91d332ead8c0bdd0)](https://www.codacy.com/gh/shenweichen/DeepCTR?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=shenweichen/DeepCTR&amp;utm_campaign=Badge_Grade)
 [![Disscussion](https://img.shields.io/badge/chat-wechat-brightgreen?style=flat)](./README.md#DisscussionGroup)
 [![License](https://img.shields.io/github/license/shenweichen/deepctr.svg)](https://github.com/shenweichen/deepctr/blob/master/LICENSE)
@@ -55,7 +55,10 @@ Let's [**Get Started!**](https://deepctr-doc.readthedocs.io/en/latest/Quick-Star
 |                FiBiNET                 | [RecSys 2019][FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf)   |
 |                FLEN                    | [arxiv 2019][FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690.pdf)   |
 |                 BST                   | [DLP-KDD 2019][Behavior sequence transformer for e-commerce recommendation in Alibaba](https://arxiv.org/pdf/1905.06874.pdf)                           | 
+|                IFM                 | [IJCAI 2019][An Input-aware Factorization Machine for Sparse Prediction](https://www.ijcai.org/Proceedings/2019/0203.pdf)   |
 |                DCN V2                    | [arxiv 2020][DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535)   |
+|                DIFM                 | [IJCAI 2020][A Dual Input-aware Factorization Machine for CTR Prediction](https://www.ijcai.org/Proceedings/2020/0434.pdf)   |
+|   FEFM and DeepFEFM                    | [arxiv 2020][Field-Embedded Factorization Machines for Click-through rate prediction](https://arxiv.org/abs/2009.09931)                                         |
 
 ## Citation
 
@@ -78,7 +81,7 @@ If you find this code useful in your research, please cite it using the followin
 ## DisscussionGroup
 
 - [Discussions](https://github.com/shenweichen/DeepCTR/discussions)
-- 公众号：**浅梦的学习笔记**  
+- 公众号：**浅梦学习笔记**  
 - wechat ID: **deepctrbot**
 
   ![wechat](./docs/pics/code.png)
@@ -112,7 +115,7 @@ If you find this code useful in your research, please cite it using the followin
       </td>
       <td>
          <a href="https://github.com/TanTingyi"><img width="70" height="70" src="https://github.com/TanTingyi.png?s=40" alt="pic"></a><br>
-         <a href="https://github.com/TanTingyi">LeoCai</a>
+         <a href="https://github.com/TanTingyi">Tan Tingyi</a>
          <p>  Chongqing University <br> of  Posts and <br> Telecommunications   </p>
       </td>
     </tr>
 
@@ -1,4 +1,4 @@
 from .utils import check_version
 
-__version__ = '0.8.5'
+__version__ = '0.8.6'
 check_version(__version__)
@@ -10,3 +10,4 @@
 from .pnn import PNNEstimator
 from .wdl import WDLEstimator
 from .xdeepfm import xDeepFMEstimator
+from .deepfefm import DeepFEFMEstimator
@@ -0,0 +1,89 @@
+# -*- coding:utf-8 -*-
+"""
+Author:
+    Harshit Pande
+
+Reference:
+    [1] Field-Embedded Factorization Machines for Click-through Rate Prediction]
+    (https://arxiv.org/abs/2009.09931)
+
+"""
+
+import tensorflow as tf
+
+from ..feature_column import get_linear_logit, input_from_feature_columns
+from ..utils import DNN_SCOPE_NAME, deepctr_model_fn, variable_scope
+from ...layers.core import DNN
+from ...layers.interaction import FEFMLayer
+from ...layers.utils import concat_func, add_func, combined_dnn_input, reduce_sum
+
+
+def DeepFEFMEstimator(linear_feature_columns, dnn_feature_columns,
+                      dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding_feat=0.00001,
+                      l2_reg_embedding_field=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0.0,
+                      dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None,
+                      config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None):
+    """Instantiates the DeepFEFM Network architecture or the shallow FEFM architecture (Ablation support not provided
+    as estimator is meant for production, Ablation support provided in DeepFEFM implementation in models
+
+    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
+    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
+    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
+    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
+    :param l2_reg_embedding_feat: float. L2 regularizer strength applied to embedding vector of features
+    :param l2_reg_embedding_field: float, L2 regularizer to field embeddings
+    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
+    :param seed: integer ,to use as random seed.
+    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
+    :param dnn_activation: Activation function to use in DNN
+    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
+    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
+    :param model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator
+        to continue training a previously saved model.
+    :param config: tf.RunConfig object to configure the runtime settings.
+    :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
+        the linear part of the model. Defaults to FTRL optimizer.
+    :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
+        the deep part of the model. Defaults to Adagrad optimizer.
+    :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
+        run on the chief worker during training.
+    :return: A Tensorflow Estimator  instance.
+    """
+
+    def _model_fn(features, labels, mode, config):
+        train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
+
+        linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
+        final_logit_components = [linear_logits]
+
+        with variable_scope(DNN_SCOPE_NAME):
+            sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
+                                                                                 l2_reg_embedding=l2_reg_embedding_feat)
+
+            fefm_interaction_embedding = FEFMLayer(
+                regularizer=l2_reg_embedding_field)(concat_func(sparse_embedding_list, axis=1))
+
+            fefm_logit = tf.keras.layers.Lambda(lambda x: reduce_sum(x, axis=1, keep_dims=True))(
+                fefm_interaction_embedding)
+
+            final_logit_components.append(fefm_logit)
+
+            if dnn_hidden_units:
+                dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
+                dnn_input = concat_func([dnn_input, fefm_interaction_embedding], axis=1)
+
+                dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(
+                    dnn_input, training=train_flag)
+
+                dnn_logit = tf.keras.layers.Dense(
+                    1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output)
+
+                final_logit_components.append(dnn_logit)
+
+        logits = add_func(final_logit_components)
+
+        return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
+                                training_chief_hooks=training_chief_hooks)
+
+    return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
@@ -44,27 +44,29 @@ def _eval_metric_ops(self,
             _summary_key(self._name, "prediction/mean"): metrics.mean(predictions, weights=weights),
             _summary_key(self._name, "label/mean"): metrics.mean(labels, weights=weights),
         }
-        tf.summary.scalar("prediction/mean", metric_ops[_summary_key(self._name, "prediction/mean")][1])
-        tf.summary.scalar("label/mean", metric_ops[_summary_key(self._name, "label/mean")][1])
+
+        summary_scalar("prediction/mean", metric_ops[_summary_key(self._name, "prediction/mean")][1])
+        summary_scalar("label/mean", metric_ops[_summary_key(self._name, "label/mean")][1])
+
 
         mean_loss = losses.compute_weighted_loss(
             unweighted_loss, weights=1.0, reduction=losses.Reduction.MEAN)
 
         if self._task == "binary":
             metric_ops[_summary_key(self._name, "LogLoss")] = metrics.mean(mean_loss, weights=weights, )
-            tf.summary.scalar("LogLoss", mean_loss)
+            summary_scalar("LogLoss", mean_loss)
 
             metric_ops[_summary_key(self._name, "AUC")] = metrics.auc(labels, predictions, weights=weights)
-            tf.summary.scalar("AUC", metric_ops[_summary_key(self._name, "AUC")][1])
+            summary_scalar("AUC", metric_ops[_summary_key(self._name, "AUC")][1])
         else:
 
             metric_ops[_summary_key(self._name, "MSE")] = metrics.mean_squared_error(labels, predictions,
                                                                                      weights=weights)
-            tf.summary.scalar("MSE", mean_loss)
+            summary_scalar("MSE", mean_loss)
 
             metric_ops[_summary_key(self._name, "MAE")] = metrics.mean_absolute_error(labels, predictions,
                                                                                       weights=weights)
-            tf.summary.scalar("MAE", metric_ops[_summary_key(self._name, "MAE")][1])
+            summary_scalar("MAE", metric_ops[_summary_key(self._name, "MAE")][1])
 
         return metric_ops
 
@@ -206,3 +208,10 @@ def to_float(x, name="ToFloat"):
         return tf.to_float(x, name)
     except AttributeError:
         return tf.compat.v1.to_float(x, name)
+
+
+def summary_scalar(name, data):
+    try:
+        tf.summary.scalar(name, data)
+    except AttributeError:  # tf version 2.5.0+:AttributeError: module 'tensorflow._api.v2.summary' has no attribute 'scalar'
+        tf.compat.v1.summary.scalar(name, data)
@@ -1,14 +1,15 @@
+import tensorflow as tf
 from collections import namedtuple, OrderedDict
 from copy import copy
 from itertools import chain
 
 from tensorflow.python.keras.initializers import RandomNormal, Zeros
-from tensorflow.python.keras.layers import Input
+from tensorflow.python.keras.layers import Input, Lambda
 
 from .inputs import create_embedding_matrix, embedding_lookup, get_dense_input, varlen_embedding_lookup, \
     get_varlen_pooling_list, mergeDict
 from .layers import Linear
-from .layers.utils import concat_func, add_func
+from .layers.utils import concat_func
 
 DEFAULT_GROUP_NAME = "default_group"
 
@@ -145,7 +146,7 @@ def build_input_features(feature_columns, prefix=''):
 
 
 def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear',
-                     l2_reg=0):
+                     l2_reg=0, sparse_feat_refine_weight=None):
     linear_feature_columns = copy(feature_columns)
     for i in range(len(linear_feature_columns)):
         if isinstance(linear_feature_columns[i], SparseFeat):
@@ -166,16 +167,21 @@ def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=10
         if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0:
             sparse_input = concat_func(linear_emb_list[i])
             dense_input = concat_func(dense_input_list)
+            if sparse_feat_refine_weight is not None:
+                sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
+                    [sparse_input, sparse_feat_refine_weight])
             linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input])
         elif len(linear_emb_list[i]) > 0:
             sparse_input = concat_func(linear_emb_list[i])
+            if sparse_feat_refine_weight is not None:
+                sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
+                    [sparse_input, sparse_feat_refine_weight])
             linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input)
         elif len(dense_input_list) > 0:
             dense_input = concat_func(dense_input_list)
             linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input)
-        else:
-            # raise NotImplementedError
-            return add_func([])
+        else:   #empty feature_columns
+            return Lambda(lambda x: tf.constant([[0.0]]))(list(features.values())[0])
         linear_logit_list.append(linear_logit)
 
     return concat_func(linear_logit_list)
 
@@ -5,12 +5,13 @@
 from .interaction import (CIN, FM, AFMLayer, BiInteractionPooling, CrossNet, CrossNetMix,
                           InnerProductLayer, InteractingLayer,
                           OutterProductLayer, FGCNNLayer, SENETLayer, BilinearInteraction,
-                          FieldWiseBiInteraction, FwFMLayer)
+                          FieldWiseBiInteraction, FwFMLayer, FEFMLayer)
 from .normalization import LayerNormalization
 from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM,
-                       KMaxPooling, SequencePoolingLayer,WeightedSequenceLayer,
-                       Transformer, DynamicGRU)
-from .utils import NoMask, Hash,Linear,Add,combined_dnn_input
+                       KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer,
+                       Transformer, DynamicGRU,PositionEncoding)
+
+from .utils import NoMask, Hash, Linear, Add, combined_dnn_input, softmax, reduce_sum
 
 custom_objects = {'tf': tf,
                   'InnerProductLayer': InnerProductLayer,
@@ -36,12 +37,16 @@
                   'KMaxPooling': KMaxPooling,
                   'FGCNNLayer': FGCNNLayer,
                   'Hash': Hash,
-                  'Linear':Linear,
+                  'Linear': Linear,
                   'DynamicGRU': DynamicGRU,
-                  'SENETLayer':SENETLayer,
-                  'BilinearInteraction':BilinearInteraction,
-                  'WeightedSequenceLayer':WeightedSequenceLayer,
-                  'Add':Add,
-                  'FieldWiseBiInteraction':FieldWiseBiInteraction,
-                  'FwFMLayer': FwFMLayer
+                  'SENETLayer': SENETLayer,
+                  'BilinearInteraction': BilinearInteraction,
+                  'WeightedSequenceLayer': WeightedSequenceLayer,
+                  'Add': Add,
+                  'FieldWiseBiInteraction': FieldWiseBiInteraction,
+                  'FwFMLayer': FwFMLayer,
+                  'softmax': softmax,
+                  'FEFMLayer': FEFMLayer,
+                  'reduce_sum': reduce_sum,
+                  'PositionEncoding':PositionEncoding
                   }
@@ -189,8 +189,11 @@ def call(self, inputs, training=None, **kwargs):
 
             if self.use_bn:
                 fc = self.bn_layers[i](fc, training=training)
-
-            fc = self.activation_layers[i](fc)
+            try:
+                fc = self.activation_layers[i](fc, training=training)
+            except TypeError as e:  # TypeError: call() got an unexpected keyword argument 'training'
+                print("make sure the activation function use training flag properly", e)
+                fc = self.activation_layers[i](fc)
 
             fc = self.dropout_layers[i](fc, training=training)
             deep_input = fc