|
| 1 | +# -*- coding:utf-8 -*- |
| 2 | +""" |
| 3 | +
|
| 4 | +Author: |
| 5 | + |
| 6 | +
|
| 7 | +Reference: |
| 8 | + [1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921) |
| 9 | +
|
| 10 | +""" |
| 11 | + |
| 12 | +from tensorflow.python.keras.layers import Dense, Embedding, Concatenate |
| 13 | +from tensorflow.python.keras.models import Model |
| 14 | +from tensorflow.python.keras.initializers import RandomNormal |
| 15 | +from tensorflow.python.keras.regularizers import l2 |
| 16 | +import tensorflow as tf |
| 17 | + |
| 18 | +from ..utils import get_input |
| 19 | +from ..layers import PredictionLayer, MLP, InteractingLayer |
| 20 | + |
| 21 | + |
| 22 | +def AutoInt(feature_dim_dict, embedding_size=8, att_layer_num=3, att_embedding_size=8, att_head_num=2, att_res=True, hidden_size=(256, 256), activation='relu', |
| 23 | + l2_reg_deep=0, l2_reg_embedding=1e-5, use_bn=False, keep_prob=1.0, init_std=0.0001, seed=1024, |
| 24 | + final_activation='sigmoid',): |
| 25 | + """Instantiates the AutoInt Network architecture. |
| 26 | +
|
| 27 | + :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} |
| 28 | + :param embedding_size: positive integer,sparse feature embedding_size |
| 29 | + :param att_layer_num: int.The InteractingLayer number to be used. |
| 30 | + :param att_embedding_size: int.The embedding size in multi-head self-attention network. |
| 31 | + :param att_head_num: int.The head number in multi-head self-attention network. |
| 32 | + :param att_res: bool.Whether or not use standard residual connections before output. |
| 33 | + :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net |
| 34 | + :param activation: Activation function to use in deep net |
| 35 | + :param l2_reg_deep: float. L2 regularizer strength applied to deep net |
| 36 | + :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector |
| 37 | + :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net |
| 38 | + :param keep_prob: float in (0,1]. keep_prob used in deep net |
| 39 | + :param init_std: float,to use as the initialize std of embedding vector |
| 40 | + :param seed: integer ,to use as random seed. |
| 41 | + :param final_activation: output activation,usually ``'sigmoid'`` or ``'linear'`` |
| 42 | + :return: A Keras model instance. |
| 43 | + """ |
| 44 | + |
| 45 | + if len(hidden_size) <= 0 and att_layer_num <= 0: |
| 46 | + raise ValueError("Either hidden_layer or att_layer_num must > 0") |
| 47 | + if not isinstance(feature_dim_dict, dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: |
| 48 | + raise ValueError( |
| 49 | + "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") |
| 50 | + |
| 51 | + sparse_input, dense_input = get_input(feature_dim_dict, None,) |
| 52 | + sparse_embedding = get_embeddings( |
| 53 | + feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding) |
| 54 | + embed_list = [sparse_embedding[i](sparse_input[i]) |
| 55 | + for i in range(len(sparse_input))] |
| 56 | + |
| 57 | + att_input = Concatenate(axis=1)(embed_list) if len( |
| 58 | + embed_list) > 1 else embed_list[0] |
| 59 | + |
| 60 | + for i in range(att_layer_num): |
| 61 | + att_input = InteractingLayer( |
| 62 | + att_embedding_size, att_head_num, att_res)(att_input) |
| 63 | + att_output = tf.keras.layers.Flatten()(att_input) |
| 64 | + |
| 65 | + deep_input = tf.keras.layers.Flatten()(Concatenate()(embed_list) |
| 66 | + if len(embed_list) > 1 else embed_list[0]) |
| 67 | + if len(dense_input) > 0: |
| 68 | + if len(dense_input) == 1: |
| 69 | + continuous_list = dense_input[0] |
| 70 | + else: |
| 71 | + continuous_list = Concatenate()(dense_input) |
| 72 | + |
| 73 | + deep_input = Concatenate()([deep_input, continuous_list]) |
| 74 | + |
| 75 | + if len(hidden_size) > 0 and att_layer_num > 0: # Deep & Interacting Layer |
| 76 | + deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, |
| 77 | + use_bn, seed)(deep_input) |
| 78 | + stack_out = Concatenate()([att_output, deep_out]) |
| 79 | + final_logit = Dense(1, use_bias=False, activation=None)(stack_out) |
| 80 | + elif len(hidden_size) > 0: # Only Deep |
| 81 | + deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, |
| 82 | + use_bn, seed)(deep_input) |
| 83 | + final_logit = Dense(1, use_bias=False, activation=None)(deep_out) |
| 84 | + elif att_layer_num > 0: # Only Interacting Layer |
| 85 | + final_logit = Dense(1, use_bias=False, activation=None)(att_output) |
| 86 | + else: # Error |
| 87 | + raise NotImplementedError |
| 88 | + |
| 89 | + output = PredictionLayer(final_activation)(final_logit) |
| 90 | + model = Model(inputs=sparse_input + dense_input, outputs=output) |
| 91 | + |
| 92 | + return model |
| 93 | + |
| 94 | + |
| 95 | +def get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V): |
| 96 | + sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size, |
| 97 | + embeddings_initializer=RandomNormal( |
| 98 | + mean=0.0, stddev=init_std, seed=seed), |
| 99 | + embeddings_regularizer=l2(l2_rev_V), |
| 100 | + name='sparse_emb_' + str(i) + '-' + feat) for i, feat in |
| 101 | + enumerate(feature_dim_dict["sparse"])] |
| 102 | + |
| 103 | + return sparse_embedding |
0 commit comments