Skip to content

Commit

Permalink
tree-wide: minor typo fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-it committed Oct 31, 2018
1 parent d45d0fb commit d8014ef
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 11 deletions.
2 changes: 1 addition & 1 deletion create_pretraining_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def create_instances_from_document(

def create_masked_lm_predictions(tokens, masked_lm_prob,
max_predictions_per_seq, vocab_words, rng):
"""Creates the predictis for the masked LM objective."""
"""Creates the predictions for the masked LM objective."""

cand_indexes = []
for (i, token) in enumerate(tokens):
Expand Down
4 changes: 2 additions & 2 deletions extract_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@

flags.DEFINE_bool(
"do_lower_case", True,
"Whethre to lower case the input text. Should be True for uncased "
"Whether to lower case the input text. Should be True for uncased "
"models and False for cased models.")

flags.DEFINE_integer("batch_size", 32, "Batch size for predictions.")
Expand Down Expand Up @@ -231,7 +231,7 @@ def convert_examples_to_features(examples, seq_length, tokenizer):
# sequence or the second sequence. The embedding vectors for `type=0` and
# `type=1` were learned during pre-training and are added to the wordpiece
# embedding vector (and position vector). This is not *strictly* necessary
# since the [SEP] token unambigiously separates the sequences, but it makes
# since the [SEP] token unambiguously separates the sequences, but it makes
# it easier for the model to learn the concept of sequences.
#
# For classification tasks, the first vector (corresponding to [CLS]) is
Expand Down
10 changes: 5 additions & 5 deletions modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self,
layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the
encoder and pooler.
hidden_dropout_prob: The dropout probabilitiy for all fully connected
hidden_dropout_prob: The dropout probability for all fully connected
layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention
probabilities.
Expand All @@ -63,7 +63,7 @@ def __init__(self,
(e.g., 512 or 1024 or 2048).
type_vocab_size: The vocabulary size of the `token_type_ids` passed into
`BertModel`.
initializer_range: The sttdev of the truncated_normal_initializer for
initializer_range: The stdev of the truncated_normal_initializer for
initializing all weight matrices.
"""
self.vocab_size = vocab_size
Expand Down Expand Up @@ -347,7 +347,7 @@ def dropout(input_tensor, dropout_prob):
Args:
input_tensor: float Tensor.
dropout_prob: Python float. The probabiltiy of dropping out a value (NOT of
dropout_prob: Python float. The probability of dropping out a value (NOT of
*keeping* a dimension as in `tf.nn.dropout`).
Returns:
Expand Down Expand Up @@ -605,7 +605,7 @@ def attention_layer(from_tensor,
attention_mask: (optional) int32 Tensor of shape [batch_size,
from_seq_length, to_seq_length]. The values should be 1 or 0. The
attention scores will effectively be set to -infinity for any positions in
the mask that are 0, and will be unchaged for positions that are 1.
the mask that are 0, and will be unchanged for positions that are 1.
num_attention_heads: int. Number of attention heads.
size_per_head: int. Size of each attention head.
query_act: (optional) Activation function for the query transform.
Expand Down Expand Up @@ -973,7 +973,7 @@ def assert_rank(tensor, expected_rank, name=None):
name: Optional name of the tensor for the error message.
Raises:
ValueError: If the expected shape doesn"t match the actual shape.
ValueError: If the expected shape doesn't match the actual shape.
"""
if name is None:
name = tensor.name
Expand Down
2 changes: 1 addition & 1 deletion run_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
# sequence or the second sequence. The embedding vectors for `type=0` and
# `type=1` were learned during pre-training and are added to the wordpiece
# embedding vector (and position vector). This is not *strictly* necessary
# since the [SEP] token unambigiously separates the sequences, but it makes
# since the [SEP] token unambiguously separates the sequences, but it makes
# it easier for the model to learn the concept of sequences.
#
# For classification tasks, the first vector (corresponding to [CLS]) is
Expand Down
4 changes: 2 additions & 2 deletions run_pretraining.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ def input_fn(params):
d = d.repeat()

# We must `drop_remainder` on training because the TPU requires fixed
# size dimensions. For eval, we assume we are evaling on the CPU or GPU
# and we *don"t* want to drop the remainder, otherwise we wont cover
# size dimensions. For eval, we assume we are evaluating on the CPU or GPU
# and we *don't* want to drop the remainder, otherwise we wont cover
# every sample.
d = d.apply(
tf.contrib.data.map_and_batch(
Expand Down

0 comments on commit d8014ef

Please sign in to comment.