Add ability to extract spectrogram from detections (#41)

* Add ability to extract spectrograms for detections * Improve api command language * Fix and add missing keys for extracted media * Add tests for extracted media paths in detections * Update documentation for extractions * Change matplotlib to 3.5.3 to maintain py3.7 support * Correct test for slight float variablity across platforms * Handle test variability in py38 and py39
joeweiss · Feb 28, 2023 · db1e13b · db1e13b
1 parent 8f6d5ba
commit db1e13b
Show file tree

Hide file tree

Showing 5 changed files with 199 additions and 27 deletions.
diff --git a/README.md b/README.md
@@ -135,7 +135,7 @@ print(species_list)
 - [Watch a directory for new files, then analyze with multiple analyzer models as files are saved](https://github.com/joeweiss/birdnetlib/blob/main/examples/watch_directory_multi_analyzer.py)
 - [Watch a directory for new files, and apply datetimes by parsing file names (eg _2022-08-15-birdnet-21:05:52.wav_) prior to analyzing](https://github.com/joeweiss/birdnetlib/blob/main/examples/watch_directory_date_filenames.py) This example can also be used to modify lat/lon, min_conf, etc., based on file name prior to analyzing.
 - [Limit detections to certain species by passing a predefined species list to the analyzer](https://github.com/joeweiss/birdnetlib/blob/main/examples/predefined_species_list.py) Useful when searching for a particular set of bird detections.
-- [Extract detections as audio file samples](https://github.com/joeweiss/birdnetlib/blob/main/examples/analyze_and_extract.py) Supports extractions as .flac, .wav and .mp3. Can be filtered to only extract files above a separate minimum confidence value.
+- [Extract detections as audio file samples and/or spectrograms](https://github.com/joeweiss/birdnetlib/blob/main/examples/analyze_and_extract.py) Supports audio extractions as .flac, .wav and .mp3. Spectrograms exported as .png, .jpg, or other matplotlib.pyplot supported formats. Can be filtered to only extract files above a separate minimum confidence value.
 
 ## About BirdNET-Lite and BirdNET-Analyzer
 

diff --git a/examples/analyze_and_extract.py b/examples/analyze_and_extract.py
@@ -19,21 +19,26 @@
 export_dir = "extractions"  # Directory should already exist.
 
 # Extract to default audio files (.flac)
-recording.extract_detection_as_audio(directory=export_dir)
+recording.extract_detections_as_audio(directory=export_dir)
+
+# Extract to spectrograms
+recording.extract_detections_as_spectrogram(directory=export_dir)
 
 pprint(recording.detections)
 
 """
 [{'common_name': 'House Finch',
   'confidence': 0.5066996216773987,
   'end_time': 12.0,
-  'extraction_path': 'extractions/2022-08-15-birdnet-21:05:54_9s-12s.flac',
+  'extracted_audio_path': 'extractions/2022-08-15-birdnet-21:05:54_9s-12s.flac',
+  'extracted_spectrogram_path': 'extractions/2022-08-15-birdnet-21:05:54_9s-12s.jpg',
   'scientific_name': 'Haemorhous mexicanus',
   'start_time': 9.0},
  {'common_name': 'Dark-eyed Junco',
   'confidence': 0.3555494546890259,
   'end_time': 36.0,
-  'extraction_path': 'extractions/2022-08-15-birdnet-21:05:54_33s-36s.flac',
+  'extracted_audio_path': 'extractions/2022-08-15-birdnet-21:05:54_33s-36s.flac',
+  'extracted_spectrogram_path': 'extractions/2022-08-15-birdnet-21:05:54_9s-12s.jpg',
   'scientific_name': 'Junco hyemalis',
   'start_time': 33.0}
  ]
@@ -52,17 +57,18 @@
 export_dir = "extractions"  # Directory should already exist.
 
 # Extract to .mp3 audio files, only if confidence is > 0.5 with 2 seconds of audio padding.
-recording.extract_detection_as_audio(
+recording.extract_detections_as_audio(
     directory=export_dir, format="mp3", bitrate="192k", min_conf=0.5, padding_secs=2
 )
 
+
 pprint(recording.detections)
 
 """
 [{'common_name': 'House Finch',
   'confidence': 0.5066996216773987,
   'end_time': 12.0,
-  'extraction_path': 'extractions/2022-08-15-birdnet-21:05:54_7s-14s.mp3',
+  'extracted_audio_path': 'extractions/2022-08-15-birdnet-21:05:54_7s-14s.mp3',
   'scientific_name': 'Haemorhous mexicanus',
   'start_time': 9.0}
  ]

diff --git a/pyproject.toml b/pyproject.toml
@@ -10,7 +10,7 @@ exclude = [
 
 [project]
 name = "birdnetlib"
-version = "0.2.0"
+version = "0.3.0"
 authors = [
   { name="Joe Weiss", email="[email protected]" },
 ]
@@ -25,7 +25,8 @@ classifiers = [
 ]
 dependencies = [
     "watchdog==2.1.9",
-    "pydub==0.25.1"
+    "pydub==0.25.1",
+    "matplotlib==3.5.3",
 ]
 
 [project.urls]

diff --git a/src/birdnetlib/main.py b/src/birdnetlib/main.py
@@ -7,6 +7,7 @@
 from os import path
 from birdnetlib.utils import return_week_48_from_datetime
 from pathlib import Path
+import matplotlib.pyplot as plt
 
 SAMPLE_RATE = 48000
 
@@ -37,7 +38,9 @@ def __init__(
         self.sample_secs = 3.0
         self.duration = None
         self.ndarray = None
-        self.extraction_paths = {}
+        self.extracted_audio_paths = {}
+        self.extracted_spectrogram_paths = {}
+
         p = Path(self.path)
         self.filestem = p.stem
 
@@ -78,9 +81,14 @@ def detections(self):
 
                 # Add extraction paths if available.
                 extraction_key = f"{detection['start_time']}_{detection['end_time']}"
-                file_path = self.extraction_paths.get(extraction_key, None)
-                if file_path:
-                    detection["extraction_path"] = file_path
+                audio_file_path = self.extracted_audio_paths.get(extraction_key, None)
+                if audio_file_path:
+                    detection["extracted_audio_path"] = audio_file_path
+                spectrogram_file_path = self.extracted_spectrogram_paths.get(
+                    extraction_key, None
+                )
+                if spectrogram_file_path:
+                    detection["extracted_spectrogram_path"] = spectrogram_file_path
                 qualified_detections.append(detection)
 
         return qualified_detections
@@ -130,15 +138,15 @@ def read_audio_data(self):
 
         print("read_audio_data: complete, read ", str(len(self.chunks)), "chunks.")
 
-    def extract_detection_as_audio(
+    def extract_detections_as_audio(
         self,
         directory,
         padding_secs=0,
         format="flac",
         bitrate="192k",
         min_conf=0.0,
     ):
-        self.extraction_paths = {}  # Clear paths before extraction.
+        self.extracted_audio_paths = {}  # Clear paths before extraction.
         for detection in self.detections:
 
             # Skip if detection is under min_conf parameter.
@@ -182,7 +190,47 @@ def extract_detection_as_audio(
 
             # Save path for detections list.
             extraction_key = f"{detection['start_time']}_{detection['end_time']}"
-            self.extraction_paths[extraction_key] = path
+            self.extracted_audio_paths[extraction_key] = path
+
+    def extract_detections_as_spectrogram(
+        self, directory, padding_secs=0, min_conf=0.0, top=14000, format="jpg", dpi=144
+    ):
+        self.extracted_spectrogram_paths = {}  # Clear paths before extraction.
+        for detection in self.detections:
+
+            # Skip if detection is under min_conf parameter.
+            # Useful for reducing the number of extracted detections.
+            if detection["confidence"] < min_conf:
+                continue
+
+            start_sec = int(
+                detection["start_time"] - padding_secs
+                if detection["start_time"] > padding_secs
+                else 0
+            )
+            end_sec = int(
+                detection["end_time"] + padding_secs
+                if detection["end_time"] + padding_secs < self.duration
+                else self.duration
+            )
+
+            extract_array = self.ndarray[
+                start_sec * SAMPLE_RATE : end_sec * SAMPLE_RATE
+            ]
+
+            path = f"{directory}/{self.filestem}_{start_sec}s-{end_sec}s.{format}"
+
+            plt.specgram(extract_array, Fs=SAMPLE_RATE)
+            plt.ylim(top=top)
+            plt.ylabel("frequency kHz")
+            plt.title(f"{self.filename} ({start_sec}s - {end_sec}s)", fontsize=10)
+            plt.savefig(path, dpi=dpi)
+
+            # Save path for detections list.
+            extraction_spectrogram_key = (
+                f"{detection['start_time']}_{detection['end_time']}"
+            )
+            self.extracted_spectrogram_paths[extraction_spectrogram_key] = path
 
 
 class Detection:

diff --git a/tests/test_extraction.py b/tests/test_extraction.py
@@ -27,23 +27,29 @@ def test_extraction():
     )
     recording.analyze()
 
-    """
-    TODO: Remove this comment after feature is defined.
+    # TODO: Remove this comment after feature is defined.
 
-    # Local development tests.
+    # # Local development tests.
 
-    export_dir = os.path.join(os.path.dirname(__file__), "extractions")
+    # export_dir = os.path.join(os.path.dirname(__file__), "extractions")
 
-    # Export to mp3 @ 128k for all detections with min_conf of 0.8.
-    recording.extract_detection_as_audio(
-        directory=export_dir, format="mp3", bitrate="128k", min_conf=0.5
-    )
+    # # Export to mp3 @ 128k for all detections with min_conf of 0.8.
+    # recording.extract_detections_as_audio(
+    #     directory=export_dir, format="mp3", bitrate="128k", min_conf=0.5
+    # )
+
+    # # Extract spectrograms.
+    # recording.extract_detections_as_spectrogram(
+    #     directory=export_dir, min_conf=0.5, format="jpg"
+    # )
+
+    # return
 
-    """
+    # Test audio extractions.
 
     # flac test in temporary test directory.
     with tempfile.TemporaryDirectory() as export_dir:
-        recording.extract_detection_as_audio(directory=export_dir)
+        recording.extract_detections_as_audio(directory=export_dir)
 
         # Check file list.
         files = os.listdir(export_dir)
@@ -71,7 +77,7 @@ def test_extraction():
 
     # wav test in temporary test directory.
     with tempfile.TemporaryDirectory() as export_dir:
-        recording.extract_detection_as_audio(directory=export_dir, format="wav")
+        recording.extract_detections_as_audio(directory=export_dir, format="wav")
 
         # Check file list.
         files = os.listdir(export_dir)
@@ -99,7 +105,7 @@ def test_extraction():
     # mp3 test in temporary test directory (with custom min_conf extraction)
     with tempfile.TemporaryDirectory() as export_dir:
 
-        recording.extract_detection_as_audio(
+        recording.extract_detections_as_audio(
             directory=export_dir,
             format="mp3",
             bitrate="128k",
@@ -124,3 +130,114 @@ def test_extraction():
         audio = pydub.AudioSegment.from_mp3(f"{export_dir}/{files[0]}")
         assert audio.frame_rate == 48000
         assert audio.duration_seconds == 7.0
+
+    # Test spectrogram extractions
+
+    # spectrogram test in temporary test directory (with custom min_conf extraction)
+    with tempfile.TemporaryDirectory() as export_dir:
+
+        recording.extract_detections_as_spectrogram(
+            directory=export_dir,
+            format="jpg",
+            min_conf=0.4,
+            padding_secs=2,
+        )
+
+        # Check file list.
+        files = os.listdir(export_dir)
+        files.sort()
+        expected_files = [
+            "soundscape_40s-47s.jpg",
+            "soundscape_64s-71s.jpg",
+            "soundscape_7s-14s.jpg",
+            "soundscape_82s-89s.jpg",
+        ]
+        expected_files.sort()
+
+        assert files == expected_files
+
+    # spectrogram test in temporary test directory (with custom min_conf extraction)
+    with tempfile.TemporaryDirectory() as export_dir:
+
+        recording.extract_detections_as_spectrogram(
+            directory=export_dir,
+            format="png",
+            min_conf=0.4,
+        )
+
+        # Check file list.
+        files = os.listdir(export_dir)
+        files.sort()
+        expected_files = [
+            "soundscape_42s-45s.png",
+            "soundscape_66s-69s.png",
+            "soundscape_84s-87s.png",
+            "soundscape_9s-12s.png",
+        ]
+        expected_files.sort()
+
+        assert files == expected_files
+
+    # Extract audio and spectrogram.
+
+    with tempfile.TemporaryDirectory() as export_dir:
+
+        recording.extract_detections_as_audio(
+            directory=export_dir,
+            format="mp3",
+            bitrate="128k",
+            padding_secs=2,
+        )
+
+        recording.extract_detections_as_spectrogram(
+            directory=export_dir,
+            format="png",
+            padding_secs=2,
+        )
+
+        # Check file list.
+        files = os.listdir(export_dir)
+        files.sort()
+
+        expected_files = [
+            "soundscape_31s-38s.mp3",
+            "soundscape_31s-38s.png",
+            "soundscape_40s-47s.mp3",
+            "soundscape_40s-47s.png",
+            "soundscape_49s-56s.mp3",
+            "soundscape_49s-56s.png",
+            "soundscape_58s-65s.mp3",
+            "soundscape_58s-65s.png",
+            "soundscape_64s-71s.mp3",
+            "soundscape_64s-71s.png",
+            "soundscape_67s-74s.mp3",
+            "soundscape_67s-74s.png",
+            "soundscape_7s-14s.mp3",
+            "soundscape_7s-14s.png",
+            "soundscape_82s-89s.mp3",
+            "soundscape_82s-89s.png",
+            "soundscape_91s-98s.mp3",
+            "soundscape_91s-98s.png",
+        ]
+        expected_files.sort()
+
+        assert files == expected_files
+        assert len(recording.detections) == 9
+
+        detection = recording.detections[0]
+
+        # Assert confidence (round for slight float variablity across platforms)
+        assert round(detection["confidence"], 3) == 0.507
+
+        del detection["confidence"]
+
+        expected_detection = {
+            "common_name": "House Finch",
+            "end_time": 12.0,
+            "extracted_audio_path": f"{export_dir}/soundscape_7s-14s.mp3",
+            "extracted_spectrogram_path": f"{export_dir}/soundscape_7s-14s.png",
+            "scientific_name": "Haemorhous mexicanus",
+            "start_time": 9.0,
+        }
+
+        assert detection == expected_detection