Skip to content

Commit

Permalink
cleaning - change ' to " (black requirements)
Browse files Browse the repository at this point in the history
  • Loading branch information
VictorSanh committed Jan 11, 2020
1 parent ebba9e9 commit e83d9f1
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions examples/distillation/lm_seqs_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,17 +114,17 @@ def remove_unknown_sequences(self):
"""
Remove sequences with a (too) high level of unknown tokens.
"""
if 'unk_token' not in self.params.special_tok_ids:
if "unk_token" not in self.params.special_tok_ids:
return
else:
unk_token_id = self.params.special_tok_ids['unk_token']
unk_token_id = self.params.special_tok_ids["unk_token"]
init_size = len(self)
unk_occs = np.array([np.count_nonzero(a == unk_token_id) for a in self.token_ids])
indices = (unk_occs/self.lengths) < 0.5
indices = (unk_occs / self.lengths) < 0.5
self.token_ids = self.token_ids[indices]
self.lengths = self.lengths[indices]
new_size = len(self)
logger.info(f'Remove {init_size - new_size} sequences with a high level of unknown tokens (50%).')
logger.info(f"Remove {init_size - new_size} sequences with a high level of unknown tokens (50%).")

def print_statistics(self):
"""
Expand Down

0 comments on commit e83d9f1

Please sign in to comment.