Skip to content

Commit

Permalink
rectify args btw. train and translate
Browse files Browse the repository at this point in the history
Summary: Make the command-line arguments pertaining to model architecture the same as between train.py and translate.py. Also use s() scoping function for all intermediate blobs in attention.py (this is for comatibility with multi-headed attention).

Differential Revision: D5594312

fbshipit-source-id: cadf51d854b5a9174ec913f32c655be2abf111e5
  • Loading branch information
jhcross authored and facebook-github-bot committed Aug 10, 2017
1 parent a77af48 commit 49882f7
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 14 deletions.
4 changes: 2 additions & 2 deletions caffe2/python/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@ def _calc_attention_weights(
)
flat_attention_logits = model.net.UnpackSegments(
[encoder_lengths, attention_logits],
'flat_attention_logits',
s(scope, 'flat_attention_logits'),
)
masked_attention_logits = model.net.PackSegments(
[encoder_lengths, flat_attention_logits],
'masked_attention_logits',
s(scope, 'masked_attention_logits'),
pad_minf=True,
)
attention_logits_transposed = model.net.ExpandDims(
Expand Down
36 changes: 24 additions & 12 deletions caffe2/python/models/seq2seq/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,10 +570,14 @@ def main():
'in encoder')
parser.add_argument('--use-attention', action='store_true',
help='Set flag to use seq2seq with attention model')
parser.add_argument('--encoder-cell-num-units', type=int, default=256,
help='Number of cell units in the encoder layer')
parser.add_argument('--encoder-cell-num-units', type=int, default=512,
help='Number of cell units per encoder layer')
parser.add_argument('--encoder-num-layers', type=int, default=2,
help='Number encoder layers')
parser.add_argument('--decoder-cell-num-units', type=int, default=512,
help='Number of cell units in the decoder layer')
parser.add_argument('--decoder-num-layers', type=int, default=2,
help='Number decoder layers')
parser.add_argument('--encoder-embedding-size', type=int, default=256,
help='Size of embedding in the encoder layer')
parser.add_argument('--decoder-embedding-size', type=int, default=512,
Expand All @@ -594,21 +598,29 @@ def main():

args = parser.parse_args()

encoder_layer_configs = [
dict(
num_units=args.encoder_cell_num_units,
),
] * args.encoder_num_layers

if args.use_bidirectional_encoder:
assert args.encoder_cell_num_units % 2 == 0
encoder_layer_configs[0]['num_units'] /= 2

decoder_layer_configs = [
dict(
num_units=args.decoder_cell_num_units,
),
] * args.decoder_num_layers

run_seq2seq_beam_decoder(
args,
model_params=dict(
attention=('regular' if args.use_attention else 'none'),
decoder_layer_configs=[
dict(
num_units=args.decoder_cell_num_units,
),
],
decoder_layer_configs=decoder_layer_configs,
encoder_type=dict(
encoder_layer_configs=[
dict(
num_units=args.encoder_cell_num_units,
),
],
encoder_layer_configs=encoder_layer_configs,
use_bidirectional_encoder=args.use_bidirectional_encoder,
),
encoder_embedding_size=args.encoder_embedding_size,
Expand Down

0 comments on commit 49882f7

Please sign in to comment.