Skip to content

Commit 7c8ad48

Browse files
authored
format code&doc(shenweichen#241)
1 parent 351ae76 commit 7c8ad48

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+483
-472
lines changed

deepctr/estimator/feature_column.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def get_linear_logit(features, linear_feature_columns, l2_reg_linear=0):
2222

2323
if l2_reg_linear > 0:
2424
for var in get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME)[:-1]:
25-
get_losses().add_loss(tf.nn.l2_loss(var, name=var.name.split(":")[0] + "_l2loss"),
25+
get_losses().add_loss(l2_reg_linear * tf.nn.l2_loss(var, name=var.name.split(":")[0] + "_l2loss"),
2626
get_GraphKeys().REGULARIZATION_LOSSES)
2727
return linear_logits
2828

@@ -35,7 +35,7 @@ def input_from_feature_columns(features, feature_columns, l2_reg_embedding=0.0):
3535
sparse_emb = tf.expand_dims(input_layer(features, [feat]), axis=1)
3636
sparse_emb_list.append(sparse_emb)
3737
if l2_reg_embedding > 0:
38-
get_losses().add_loss(tf.nn.l2_loss(sparse_emb, name=feat.name + "_l2loss"),
38+
get_losses().add_loss(l2_reg_embedding * tf.nn.l2_loss(sparse_emb, name=feat.name + "_l2loss"),
3939
get_GraphKeys().REGULARIZATION_LOSSES)
4040

4141
else:
@@ -49,5 +49,4 @@ def is_embedding(feature_column):
4949
from tensorflow.python.feature_column.feature_column_v2 import EmbeddingColumn
5050
except:
5151
EmbeddingColumn = _EmbeddingColumn
52-
return isinstance(feature_column, (_EmbeddingColumn,EmbeddingColumn))
53-
52+
return isinstance(feature_column, (_EmbeddingColumn, EmbeddingColumn))

deepctr/estimator/inputs.py

+16-24
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,27 @@
11
import tensorflow as tf
2-
from ..layers.utils import combined_dnn_input
32

4-
def input_fn_pandas(df, features, label=None, batch_size=256, num_epochs=1, shuffle=False, queue_capacity=2560,
3+
4+
def input_fn_pandas(df, features, label=None, batch_size=256, num_epochs=1, shuffle=False, queue_capacity_factor=10,
55
num_threads=1):
6-
"""
7-
8-
:param df:
9-
:param features:
10-
:param label:
11-
:param batch_size:
12-
:param num_epochs:
13-
:param shuffle:
14-
:param queue_capacity:
15-
:param num_threads:
16-
:return:
17-
"""
186
if label is not None:
197
y = df[label]
208
else:
219
y = None
2210
if tf.__version__ >= "2.0.0":
2311
return tf.compat.v1.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size,
2412
num_epochs=num_epochs,
25-
shuffle=shuffle, queue_capacity=queue_capacity,
13+
shuffle=shuffle,
14+
queue_capacity=batch_size * queue_capacity_factor,
2615
num_threads=num_threads)
2716

2817
return tf.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, num_epochs=num_epochs,
29-
shuffle=shuffle, queue_capacity=queue_capacity, num_threads=num_threads)
18+
shuffle=shuffle, queue_capacity=batch_size * queue_capacity_factor,
19+
num_threads=num_threads)
3020

3121

32-
def input_fn_tfrecord(filenames, feature_description, label=None, batch_size=256, num_epochs=1, shuffle=False,
33-
num_parallel_calls=10):
22+
def input_fn_tfrecord(filenames, feature_description, label=None, batch_size=256, num_epochs=1, num_parallel_calls=8,
23+
shuffle_factor=10, prefetch_factor=1,
24+
):
3425
def _parse_examples(serial_exmp):
3526
features = tf.parse_single_example(serial_exmp, features=feature_description)
3627
if label is not None:
@@ -40,16 +31,17 @@ def _parse_examples(serial_exmp):
4031

4132
def input_fn():
4233
dataset = tf.data.TFRecordDataset(filenames)
43-
dataset = dataset.map(_parse_examples, num_parallel_calls=num_parallel_calls).prefetch(
44-
buffer_size=batch_size * 10)
45-
if shuffle:
46-
dataset = dataset.shuffle(buffer_size=batch_size * 10)
34+
dataset = dataset.map(_parse_examples, num_parallel_calls=num_parallel_calls)
35+
if shuffle_factor > 0:
36+
dataset = dataset.shuffle(buffer_size=batch_size * shuffle_factor)
4737

4838
dataset = dataset.repeat(num_epochs).batch(batch_size)
39+
40+
if prefetch_factor > 0:
41+
dataset = dataset.prefetch(buffer_size=batch_size * prefetch_factor)
42+
4943
iterator = dataset.make_one_shot_iterator()
5044

5145
return iterator.get_next()
5246

5347
return input_fn
54-
55-

deepctr/estimator/models/afm.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
def AFMEstimator(linear_feature_columns, dnn_feature_columns, use_attention=True, attention_factor=8,
2121
l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, seed=1024,
2222
task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
23-
dnn_optimizer='Adagrad'):
23+
dnn_optimizer='Adagrad', training_chief_hooks=None):
2424
"""Instantiates the Attentional Factorization Machine architecture.
2525
2626
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
@@ -41,6 +41,8 @@ def AFMEstimator(linear_feature_columns, dnn_feature_columns, use_attention=True
4141
the linear part of the model. Defaults to FTRL optimizer.
4242
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
4343
the deep part of the model. Defaults to Adagrad optimizer.
44+
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
45+
run on the chief worker during training.
4446
:return: A Tensorflow Estimator instance.
4547
4648
"""
@@ -62,6 +64,7 @@ def _model_fn(features, labels, mode, config):
6264

6365
logits = linear_logits + fm_logit
6466

65-
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer)
67+
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
68+
training_chief_hooks=training_chief_hooks)
6669

6770
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)

deepctr/estimator/models/autoint.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def AutoIntEstimator(linear_feature_columns, dnn_feature_columns, att_layer_num=
2323
dnn_hidden_units=(256, 256), dnn_activation='relu', l2_reg_linear=1e-5,
2424
l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_use_bn=False, dnn_dropout=0, seed=1024,
2525
task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
26-
dnn_optimizer='Adagrad'):
26+
dnn_optimizer='Adagrad', training_chief_hooks=None):
2727
"""Instantiates the AutoInt Network architecture.
2828
2929
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
@@ -49,6 +49,8 @@ def AutoIntEstimator(linear_feature_columns, dnn_feature_columns, att_layer_num=
4949
the linear part of the model. Defaults to FTRL optimizer.
5050
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
5151
the deep part of the model. Defaults to Adagrad optimizer.
52+
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
53+
run on the chief worker during training.
5254
:return: A Tensorflow Estimator instance.
5355
5456
"""
@@ -89,6 +91,7 @@ def _model_fn(features, labels, mode, config):
8991

9092
logits = linear_logits + final_logit
9193

92-
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer)
94+
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
95+
training_chief_hooks=training_chief_hooks)
9396

9497
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)

deepctr/estimator/models/ccpm.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
def CCPMEstimator(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(6, 5), conv_filters=(4, 4),
2222
dnn_hidden_units=(256,), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_dropout=0,
2323
seed=1024, task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
24-
dnn_optimizer='Adagrad'):
24+
dnn_optimizer='Adagrad', training_chief_hooks=None):
2525
"""Instantiates the Convolutional Click Prediction Model architecture.
2626
2727
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
@@ -43,6 +43,8 @@ def CCPMEstimator(linear_feature_columns, dnn_feature_columns, conv_kernel_width
4343
the linear part of the model. Defaults to FTRL optimizer.
4444
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
4545
the deep part of the model. Defaults to Adagrad optimizer.
46+
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
47+
run on the chief worker during training.
4648
:return: A Tensorflow Estimator instance.
4749
4850
"""
@@ -79,11 +81,13 @@ def _model_fn(features, labels, mode, config):
7981

8082
flatten_result = tf.keras.layers.Flatten()(pooling_result)
8183
dnn_out = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn,
82-
dropout_rate=dnn_dropout)(flatten_result, training=train_flag)
84+
dropout_rate=dnn_dropout, seed=seed)(flatten_result, training=train_flag)
8385
dnn_logit = tf.keras.layers.Dense(1, use_bias=False)(dnn_out)
8486

8587
logits = linear_logits + dnn_logit
8688

87-
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer)
89+
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
90+
training_chief_hooks=training_chief_hooks
91+
)
8892

8993
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)

deepctr/estimator/models/dcn.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def DCNEstimator(linear_feature_columns, dnn_feature_columns, cross_num=2, dnn_h
2020
l2_reg_embedding=1e-5,
2121
l2_reg_cross=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_use_bn=False,
2222
dnn_activation='relu', task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
23-
dnn_optimizer='Adagrad'):
23+
dnn_optimizer='Adagrad', training_chief_hooks=None):
2424
"""Instantiates the Deep&Cross Network architecture.
2525
2626
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
@@ -43,6 +43,8 @@ def DCNEstimator(linear_feature_columns, dnn_feature_columns, cross_num=2, dnn_h
4343
the linear part of the model. Defaults to FTRL optimizer.
4444
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
4545
the deep part of the model. Defaults to Adagrad optimizer.
46+
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
47+
run on the chief worker during training.
4648
:return: A Tensorflow Estimator instance.
4749
4850
"""
@@ -81,6 +83,7 @@ def _model_fn(features, labels, mode, config):
8183

8284
logits = linear_logits + final_logit
8385

84-
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer)
86+
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
87+
training_chief_hooks=training_chief_hooks)
8588

8689
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)

deepctr/estimator/models/deepfm.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def DeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_unit
2121
l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
2222
dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None, config=None,
2323
linear_optimizer='Ftrl',
24-
dnn_optimizer='Adagrad'):
24+
dnn_optimizer='Adagrad', training_chief_hooks=None):
2525
"""Instantiates the DeepFM Network architecture.
2626
2727
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
@@ -44,6 +44,8 @@ def DeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_unit
4444
the linear part of the model. Defaults to FTRL optimizer.
4545
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
4646
the deep part of the model. Defaults to Adagrad optimizer.
47+
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
48+
run on the chief worker during training.
4749
:return: A Tensorflow Estimator instance.
4850
4951
"""
@@ -68,6 +70,8 @@ def _model_fn(features, labels, mode, config):
6870

6971
logits = linear_logits + fm_logit + dnn_logit
7072

71-
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer)
73+
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
74+
training_chief_hooks
75+
=training_chief_hooks)
7276

7377
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)

deepctr/estimator/models/fibinet.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def FiBiNETEstimator(linear_feature_columns, dnn_feature_columns, bilinear_type=
2121
dnn_hidden_units=(128, 128), l2_reg_linear=1e-5,
2222
l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu',
2323
task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
24-
dnn_optimizer='Adagrad'):
24+
dnn_optimizer='Adagrad', training_chief_hooks=None):
2525
"""Instantiates the Feature Importance and Bilinear feature Interaction NETwork architecture.
2626
2727
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
@@ -44,6 +44,8 @@ def FiBiNETEstimator(linear_feature_columns, dnn_feature_columns, bilinear_type=
4444
the linear part of the model. Defaults to FTRL optimizer.
4545
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
4646
the deep part of the model. Defaults to Adagrad optimizer.
47+
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
48+
run on the chief worker during training.
4749
:return: A Tensorflow Estimator instance.
4850
"""
4951

@@ -73,6 +75,7 @@ def _model_fn(features, labels, mode, config):
7375

7476
logits = linear_logits + dnn_logit
7577

76-
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer)
78+
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
79+
training_chief_hooks=training_chief_hooks)
7780

7881
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)

deepctr/estimator/models/fnn.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
def FNNEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128),
1818
l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
1919
dnn_activation='relu', task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
20-
dnn_optimizer='Adagrad'):
20+
dnn_optimizer='Adagrad', training_chief_hooks=None):
2121
"""Instantiates the Factorization-supported Neural Network architecture.
2222
2323
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
@@ -38,6 +38,8 @@ def FNNEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(
3838
the linear part of the model. Defaults to FTRL optimizer.
3939
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
4040
the deep part of the model. Defaults to Adagrad optimizer.
41+
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
42+
run on the chief worker during training.
4143
:return: A Tensorflow Estimator instance.
4244
4345
"""
@@ -58,6 +60,7 @@ def _model_fn(features, labels, mode, config):
5860

5961
logits = linear_logits + dnn_logit
6062

61-
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer)
63+
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
64+
training_chief_hooks=training_chief_hooks)
6265

6366
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)

deepctr/estimator/models/fwfm.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def FwFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=
2323
l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_field_strength=0.00001, l2_reg_dnn=0,
2424
seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None,
2525
config=None, linear_optimizer='Ftrl',
26-
dnn_optimizer='Adagrad'):
26+
dnn_optimizer='Adagrad', training_chief_hooks=None):
2727
"""Instantiates the DeepFwFM Network architecture.
2828
2929
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
@@ -48,6 +48,8 @@ def FwFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=
4848
the linear part of the model. Defaults to FTRL optimizer.
4949
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
5050
the deep part of the model. Defaults to Adagrad optimizer.
51+
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
52+
run on the chief worker during training.
5153
:return: A Tensorflow Estimator instance.
5254
5355
"""
@@ -77,6 +79,7 @@ def _model_fn(features, labels, mode, config):
7779

7880
logits = add_func(final_logit_components)
7981

80-
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer)
82+
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
83+
training_chief_hooks=training_chief_hooks)
8184

8285
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)

deepctr/estimator/models/nfm.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def NFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(
1919
l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, bi_dropout=0,
2020
dnn_dropout=0, dnn_activation='relu', task='binary', model_dir=None, config=None,
2121
linear_optimizer='Ftrl',
22-
dnn_optimizer='Adagrad'):
22+
dnn_optimizer='Adagrad', training_chief_hooks=None):
2323
"""Instantiates the Neural Factorization Machine architecture.
2424
2525
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
@@ -41,6 +41,8 @@ def NFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(
4141
the linear part of the model. Defaults to FTRL optimizer.
4242
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
4343
the deep part of the model. Defaults to Adagrad optimizer.
44+
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
45+
run on the chief worker during training.
4446
:return: A Tensorflow Estimator instance.
4547
4648
"""
@@ -66,6 +68,7 @@ def _model_fn(features, labels, mode, config):
6668

6769
logits = linear_logits + dnn_logit
6870

69-
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer)
71+
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
72+
training_chief_hooks=training_chief_hooks)
7073

7174
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)

0 commit comments

Comments
 (0)