Skip to content

Commit

Permalink
Fix edit_dist.cu header file directives (facebookresearch#4667)
Browse files Browse the repository at this point in the history
  • Loading branch information
cbalioglu authored Aug 29, 2022
1 parent eda7037 commit d81fac8
Show file tree
Hide file tree
Showing 8 changed files with 29 additions and 19 deletions.
2 changes: 1 addition & 1 deletion fairseq/clib/libnat_cuda/edit_dist.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#include "edit_dist.h"

#include <THC/THC.h>
#include <c10/cuda/CUDAStream.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
Expand Down
4 changes: 2 additions & 2 deletions fairseq/model_parallel/megatron_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ def __init__(self, cfg: FairseqConfig, task, model, criterion, **kwargs):

def clip_grad_norm(self, clip_norm):
def _aggregate_model_parallel_grad_norm(total_norm):
total_norm = total_norm ** 2
total_norm = total_norm**2
distributed_utils.all_reduce(
total_norm, group=distributed_utils.get_model_parallel_group()
)
total_norm = total_norm ** 0.5
total_norm = total_norm**0.5
return total_norm

return self.optimizer.clip_grad_norm(
Expand Down
2 changes: 1 addition & 1 deletion fairseq/models/speech_to_text/hub_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def get_prediction(
pred = cls.detokenize(task, pred_tokens[0][0]["tokens"])
eos_token = task.data_cfg.config.get("eos_token", None)
if eos_token:
pred = ' '.join(pred.split(' ')[:-1])
pred = " ".join(pred.split(" ")[:-1])

if synthesize_speech:
pfx = f"{_tgt_lang}_" if task.data_cfg.prepend_tgt_lang_tag else ""
Expand Down
4 changes: 3 additions & 1 deletion fairseq/models/speech_to_text/s2t_wav_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,9 @@ def _conv_out_length(input_length, kernel_size, stride):

for i in range(len(self.feature_enc_layers)):
input_lengths = _conv_out_length(
input_lengths, self.feature_enc_layers[i][1], self.feature_enc_layers[i][2]
input_lengths,
self.feature_enc_layers[i][1],
self.feature_enc_layers[i][2],
)

return input_lengths.to(torch.long)
Expand Down
13 changes: 9 additions & 4 deletions fairseq/models/speech_to_text/xm_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,10 @@ def add_wav2vec_asr_args(parser):
help="if set, then the weight-norm (in one pos_conv layer) is removed from the model",
)
parser.add_argument(
"--encoder-embed-dim", type=int, metavar="N", help="encoder embedding dimension to be used when w2v_path is None and no encoder_proj is set"
"--encoder-embed-dim",
type=int,
metavar="N",
help="encoder embedding dimension to be used when w2v_path is None and no encoder_proj is set",
)


Expand Down Expand Up @@ -497,8 +500,7 @@ def hub_models(cls):
"xm_transformer_s2ut_800m-es-en-st-asr-bt_h1_2022",
"xm_transformer_s2ut_800m-en-es-st_plus_asr",
"xm_transformer_s2ut_800m-hk-en-h1_2022",
"xm_transformer_s2ut_800m-en-hk-h1_2022"

"xm_transformer_s2ut_800m-en-hk-h1_2022",
]
return {i: f"{base_url}/{i}.tar.gz" for i in model_ids}

Expand All @@ -514,6 +516,7 @@ def from_pretrained(
**kwargs,
):
from fairseq import hub_utils

x = hub_utils.from_pretrained(
model_name_or_path,
checkpoint_file,
Expand Down Expand Up @@ -557,7 +560,9 @@ def build_encoder(cls, args):
if args.w2v_path:
state = checkpoint_utils.load_checkpoint_to_cpu(args.w2v_path)
if state.get("cfg") is not None:
encoder_embed_dim = state["cfg"]._content["model"]["encoder_embed_dim"]
encoder_embed_dim = state["cfg"]._content["model"][
"encoder_embed_dim"
]
elif state.get("args") is not None:
encoder_embed_dim = state["args"].encoder_embed_dim
else:
Expand Down
13 changes: 8 additions & 5 deletions fairseq/models/text_to_speech/vocoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def get_window_sum_square(
x = torch.zeros(n, dtype=torch.float32)
for i in range(n_frames):
ofst = i * hop_length
x[ofst: min(n, ofst + n_fft)] += w_sq[: max(0, min(n_fft, n - ofst))]
x[ofst : min(n, ofst + n_fft)] += w_sq[: max(0, min(n_fft, n - ofst))]
return x

def inverse(self, magnitude: torch.Tensor, phase) -> torch.Tensor:
Expand All @@ -102,8 +102,8 @@ def inverse(self, magnitude: torch.Tensor, phase) -> torch.Tensor:
approx_nonzero_indices = win_sum_sq > self.tiny
x[:, :, approx_nonzero_indices] /= win_sum_sq[approx_nonzero_indices]
x *= self.n_fft / self.hop_length
x = x[:, :, self.n_fft // 2:]
x = x[:, :, : -self.n_fft // 2:]
x = x[:, :, self.n_fft // 2 :]
x = x[:, :, : -self.n_fft // 2 :]
return x

def forward(self, specgram: torch.Tensor) -> torch.Tensor:
Expand Down Expand Up @@ -252,8 +252,11 @@ def from_data_cfg(cls, args, data_cfg):
@classmethod
def hub_models(cls):
base_url = "http://dl.fbaipublicfiles.com/fairseq/vocoder"
model_ids = ["unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur",
"unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_es_css10_dur", "unit_hifigan_HK_layer12.km2500_frame_TAT-TTS"]
model_ids = [
"unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur",
"unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_es_css10_dur",
"unit_hifigan_HK_layer12.km2500_frame_TAT-TTS",
]
return {i: f"{base_url}/{i}.tar.gz" for i in model_ids}

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion fairseq/models/transformer/transformer_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ class TransformerConfig(FairseqDataclass):
"help": "if true then the source and target embedding table is "
"merged into one table. This is going to make the model smaller but "
"it might hurt performance."
}
},
)
no_token_positional_embeddings: bool = field(
default=False,
Expand Down
8 changes: 4 additions & 4 deletions fairseq/sequence_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def _generate(
# handle max length constraint
if step >= max_len:
lprobs[:, : self.eos] = -math.inf
lprobs[:, self.eos + 1:] = -math.inf
lprobs[:, self.eos + 1 :] = -math.inf

# handle prefix tokens (possibly with different lengths)
if (
Expand Down Expand Up @@ -604,7 +604,7 @@ def _prefix_tokens(
if eos_mask.any():
# validate that the first beam matches the prefix
first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[
:, 0, 1: step + 1
:, 0, 1 : step + 1
]
eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0]
target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step]
Expand Down Expand Up @@ -649,12 +649,12 @@ def finalize_hypos(
# tokens is (batch * beam, max_len). So the index_select
# gets the newly EOS rows, then selects cols 1..{step + 2}
tokens_clone = tokens.index_select(0, bbsz_idx)[
:, 1: step + 2
:, 1 : step + 2
] # skip the first index, which is EOS

tokens_clone[:, step] = self.eos
attn_clone = (
attn.index_select(0, bbsz_idx)[:, :, 1: step + 2]
attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2]
if attn is not None
else None
)
Expand Down

0 comments on commit d81fac8

Please sign in to comment.