Skip to content

Commit

Permalink
remove panns 1s chunking
Browse files Browse the repository at this point in the history
  • Loading branch information
jnwnlee committed Feb 16, 2024
1 parent 86885a8 commit ce1e902
Showing 1 changed file with 1 addition and 5 deletions.
6 changes: 1 addition & 5 deletions fadtk/model_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,8 @@ class PANNsModel(ModelLoader):
IEEE/ACM Transactions on Audio, Speech, and Language Processing 28 (2020): 2880-2894.
Specify the model to use (cnn14-32k, cnn14-16k, wavegram-logmel).
You can also specify wether to send the full provided audio or 1-s chunks of audio (cnn14-32k-1s). This was shown
to have a very low impact on performances.
"""
def __init__(self, variant: Literal['cnn14-32k', 'cnn14-32k-1s', 'cnn14-16k', 'wavegram-logmel'], audio_len=None):
def __init__(self, variant: Literal['cnn14-32k', 'cnn14-16k', 'wavegram-logmel'], audio_len=None):
super().__init__(f"panns-{variant}", 2048,
sr=16000 if variant == 'cnn14-16k' else 16000, audio_len=audio_len)
self.variant = variant
Expand Down Expand Up @@ -168,8 +166,6 @@ def load_model(self):
self.model.to(self.device)

def _get_embedding(self, audio: np.ndarray) -> np.ndarray:
if '-1s' in self.variant:
audio = chunk_np_array(audio, self.sr)
audio = torch.from_numpy(audio).float().to(self.device)
if len(audio.shape) == 1:
audio = audio.unsqueeze(0)
Expand Down

0 comments on commit ce1e902

Please sign in to comment.