Fix edit_dist.cu header file directives (facebookresearch#4667)

yms9654 · Aug 29, 2022 · d81fac8 · d81fac8
1 parent eda7037
commit d81fac8
Show file tree

Hide file tree

Showing 8 changed files with 29 additions and 19 deletions.
diff --git a/fairseq/clib/libnat_cuda/edit_dist.cu b/fairseq/clib/libnat_cuda/edit_dist.cu
@@ -8,7 +8,7 @@
 
 #include "edit_dist.h"
 
-#include <THC/THC.h>
+#include <c10/cuda/CUDAStream.h>
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <device_launch_parameters.h>

diff --git a/fairseq/model_parallel/megatron_trainer.py b/fairseq/model_parallel/megatron_trainer.py
@@ -38,11 +38,11 @@ def __init__(self, cfg: FairseqConfig, task, model, criterion, **kwargs):
 
     def clip_grad_norm(self, clip_norm):
         def _aggregate_model_parallel_grad_norm(total_norm):
-            total_norm = total_norm ** 2
+            total_norm = total_norm**2
             distributed_utils.all_reduce(
                 total_norm, group=distributed_utils.get_model_parallel_group()
             )
-            total_norm = total_norm ** 0.5
+            total_norm = total_norm**0.5
             return total_norm
 
         return self.optimizer.clip_grad_norm(

diff --git a/fairseq/models/speech_to_text/hub_interface.py b/fairseq/models/speech_to_text/hub_interface.py
@@ -97,7 +97,7 @@ def get_prediction(
         pred = cls.detokenize(task, pred_tokens[0][0]["tokens"])
         eos_token = task.data_cfg.config.get("eos_token", None)
         if eos_token:
-            pred = ' '.join(pred.split(' ')[:-1])
+            pred = " ".join(pred.split(" ")[:-1])
 
         if synthesize_speech:
             pfx = f"{_tgt_lang}_" if task.data_cfg.prepend_tgt_lang_tag else ""

diff --git a/fairseq/models/speech_to_text/s2t_wav_transformer.py b/fairseq/models/speech_to_text/s2t_wav_transformer.py
@@ -246,7 +246,9 @@ def _conv_out_length(input_length, kernel_size, stride):
 
         for i in range(len(self.feature_enc_layers)):
             input_lengths = _conv_out_length(
-                input_lengths, self.feature_enc_layers[i][1], self.feature_enc_layers[i][2]
+                input_lengths,
+                self.feature_enc_layers[i][1],
+                self.feature_enc_layers[i][2],
             )
 
         return input_lengths.to(torch.long)

diff --git a/fairseq/models/speech_to_text/xm_transformer.py b/fairseq/models/speech_to_text/xm_transformer.py
@@ -249,7 +249,10 @@ def add_wav2vec_asr_args(parser):
         help="if set, then the weight-norm (in one pos_conv layer) is removed from the model",
     )
     parser.add_argument(
-        "--encoder-embed-dim", type=int, metavar="N", help="encoder embedding dimension to be used when w2v_path is None and no encoder_proj is set"
+        "--encoder-embed-dim",
+        type=int,
+        metavar="N",
+        help="encoder embedding dimension to be used when w2v_path is None and no encoder_proj is set",
     )
 
 
@@ -497,8 +500,7 @@ def hub_models(cls):
             "xm_transformer_s2ut_800m-es-en-st-asr-bt_h1_2022",
             "xm_transformer_s2ut_800m-en-es-st_plus_asr",
             "xm_transformer_s2ut_800m-hk-en-h1_2022",
-            "xm_transformer_s2ut_800m-en-hk-h1_2022"
-
+            "xm_transformer_s2ut_800m-en-hk-h1_2022",
         ]
         return {i: f"{base_url}/{i}.tar.gz" for i in model_ids}
 
@@ -514,6 +516,7 @@ def from_pretrained(
         **kwargs,
     ):
         from fairseq import hub_utils
+
         x = hub_utils.from_pretrained(
             model_name_or_path,
             checkpoint_file,
@@ -557,7 +560,9 @@ def build_encoder(cls, args):
             if args.w2v_path:
                 state = checkpoint_utils.load_checkpoint_to_cpu(args.w2v_path)
                 if state.get("cfg") is not None:
-                    encoder_embed_dim = state["cfg"]._content["model"]["encoder_embed_dim"]
+                    encoder_embed_dim = state["cfg"]._content["model"][
+                        "encoder_embed_dim"
+                    ]
                 elif state.get("args") is not None:
                     encoder_embed_dim = state["args"].encoder_embed_dim
                 else:

diff --git a/fairseq/models/text_to_speech/vocoder.py b/fairseq/models/text_to_speech/vocoder.py
@@ -84,7 +84,7 @@ def get_window_sum_square(
         x = torch.zeros(n, dtype=torch.float32)
         for i in range(n_frames):
             ofst = i * hop_length
-            x[ofst: min(n, ofst + n_fft)] += w_sq[: max(0, min(n_fft, n - ofst))]
+            x[ofst : min(n, ofst + n_fft)] += w_sq[: max(0, min(n_fft, n - ofst))]
         return x
 
     def inverse(self, magnitude: torch.Tensor, phase) -> torch.Tensor:
@@ -102,8 +102,8 @@ def inverse(self, magnitude: torch.Tensor, phase) -> torch.Tensor:
         approx_nonzero_indices = win_sum_sq > self.tiny
         x[:, :, approx_nonzero_indices] /= win_sum_sq[approx_nonzero_indices]
         x *= self.n_fft / self.hop_length
-        x = x[:, :, self.n_fft // 2:]
-        x = x[:, :, : -self.n_fft // 2:]
+        x = x[:, :, self.n_fft // 2 :]
+        x = x[:, :, : -self.n_fft // 2 :]
         return x
 
     def forward(self, specgram: torch.Tensor) -> torch.Tensor:
@@ -252,8 +252,11 @@ def from_data_cfg(cls, args, data_cfg):
     @classmethod
     def hub_models(cls):
         base_url = "http://dl.fbaipublicfiles.com/fairseq/vocoder"
-        model_ids = ["unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur",
-                     "unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_es_css10_dur", "unit_hifigan_HK_layer12.km2500_frame_TAT-TTS"]
+        model_ids = [
+            "unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur",
+            "unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_es_css10_dur",
+            "unit_hifigan_HK_layer12.km2500_frame_TAT-TTS",
+        ]
         return {i: f"{base_url}/{i}.tar.gz" for i in model_ids}
 
     @classmethod

diff --git a/fairseq/models/transformer/transformer_config.py b/fairseq/models/transformer/transformer_config.py
@@ -138,7 +138,7 @@ class TransformerConfig(FairseqDataclass):
             "help": "if true then the source and target embedding table is "
             "merged into one table. This is going to make the model smaller but "
             "it might hurt performance."
-        }
+        },
     )
     no_token_positional_embeddings: bool = field(
         default=False,

diff --git a/fairseq/sequence_generator.py b/fairseq/sequence_generator.py
@@ -374,7 +374,7 @@ def _generate(
             # handle max length constraint
             if step >= max_len:
                 lprobs[:, : self.eos] = -math.inf
-                lprobs[:, self.eos + 1:] = -math.inf
+                lprobs[:, self.eos + 1 :] = -math.inf
 
             # handle prefix tokens (possibly with different lengths)
             if (
@@ -604,7 +604,7 @@ def _prefix_tokens(
         if eos_mask.any():
             # validate that the first beam matches the prefix
             first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[
-                :, 0, 1: step + 1
+                :, 0, 1 : step + 1
             ]
             eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0]
             target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step]
@@ -649,12 +649,12 @@ def finalize_hypos(
         # tokens is (batch * beam, max_len). So the index_select
         # gets the newly EOS rows, then selects cols 1..{step + 2}
         tokens_clone = tokens.index_select(0, bbsz_idx)[
-            :, 1: step + 2
+            :, 1 : step + 2
         ]  # skip the first index, which is EOS
 
         tokens_clone[:, step] = self.eos
         attn_clone = (
-            attn.index_select(0, bbsz_idx)[:, :, 1: step + 2]
+            attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2]
             if attn is not None
             else None
         )