Skip to content

Commit

Permalink
Add ability to extract spectrogram from detections (#41)
Browse files Browse the repository at this point in the history
* Add ability to extract spectrograms for detections

* Improve api command language

* Fix and add missing keys for extracted media

* Add tests for extracted media paths in detections

* Update documentation for extractions

* Change matplotlib to 3.5.3 to maintain py3.7 support

* Correct test for slight float variablity across platforms

* Handle test variability in py38 and py39
  • Loading branch information
joeweiss authored Feb 28, 2023
1 parent 8f6d5ba commit db1e13b
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 27 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ print(species_list)
- [Watch a directory for new files, then analyze with multiple analyzer models as files are saved](https://github.com/joeweiss/birdnetlib/blob/main/examples/watch_directory_multi_analyzer.py)
- [Watch a directory for new files, and apply datetimes by parsing file names (eg _2022-08-15-birdnet-21:05:52.wav_) prior to analyzing](https://github.com/joeweiss/birdnetlib/blob/main/examples/watch_directory_date_filenames.py) This example can also be used to modify lat/lon, min_conf, etc., based on file name prior to analyzing.
- [Limit detections to certain species by passing a predefined species list to the analyzer](https://github.com/joeweiss/birdnetlib/blob/main/examples/predefined_species_list.py) Useful when searching for a particular set of bird detections.
- [Extract detections as audio file samples](https://github.com/joeweiss/birdnetlib/blob/main/examples/analyze_and_extract.py) Supports extractions as .flac, .wav and .mp3. Can be filtered to only extract files above a separate minimum confidence value.
- [Extract detections as audio file samples and/or spectrograms](https://github.com/joeweiss/birdnetlib/blob/main/examples/analyze_and_extract.py) Supports audio extractions as .flac, .wav and .mp3. Spectrograms exported as .png, .jpg, or other matplotlib.pyplot supported formats. Can be filtered to only extract files above a separate minimum confidence value.

## About BirdNET-Lite and BirdNET-Analyzer

Expand Down
16 changes: 11 additions & 5 deletions examples/analyze_and_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,26 @@
export_dir = "extractions" # Directory should already exist.

# Extract to default audio files (.flac)
recording.extract_detection_as_audio(directory=export_dir)
recording.extract_detections_as_audio(directory=export_dir)

# Extract to spectrograms
recording.extract_detections_as_spectrogram(directory=export_dir)

pprint(recording.detections)

"""
[{'common_name': 'House Finch',
'confidence': 0.5066996216773987,
'end_time': 12.0,
'extraction_path': 'extractions/2022-08-15-birdnet-21:05:54_9s-12s.flac',
'extracted_audio_path': 'extractions/2022-08-15-birdnet-21:05:54_9s-12s.flac',
'extracted_spectrogram_path': 'extractions/2022-08-15-birdnet-21:05:54_9s-12s.jpg',
'scientific_name': 'Haemorhous mexicanus',
'start_time': 9.0},
{'common_name': 'Dark-eyed Junco',
'confidence': 0.3555494546890259,
'end_time': 36.0,
'extraction_path': 'extractions/2022-08-15-birdnet-21:05:54_33s-36s.flac',
'extracted_audio_path': 'extractions/2022-08-15-birdnet-21:05:54_33s-36s.flac',
'extracted_spectrogram_path': 'extractions/2022-08-15-birdnet-21:05:54_9s-12s.jpg',
'scientific_name': 'Junco hyemalis',
'start_time': 33.0}
]
Expand All @@ -52,17 +57,18 @@
export_dir = "extractions" # Directory should already exist.

# Extract to .mp3 audio files, only if confidence is > 0.5 with 2 seconds of audio padding.
recording.extract_detection_as_audio(
recording.extract_detections_as_audio(
directory=export_dir, format="mp3", bitrate="192k", min_conf=0.5, padding_secs=2
)


pprint(recording.detections)

"""
[{'common_name': 'House Finch',
'confidence': 0.5066996216773987,
'end_time': 12.0,
'extraction_path': 'extractions/2022-08-15-birdnet-21:05:54_7s-14s.mp3',
'extracted_audio_path': 'extractions/2022-08-15-birdnet-21:05:54_7s-14s.mp3',
'scientific_name': 'Haemorhous mexicanus',
'start_time': 9.0}
]
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ exclude = [

[project]
name = "birdnetlib"
version = "0.2.0"
version = "0.3.0"
authors = [
{ name="Joe Weiss", email="[email protected]" },
]
Expand All @@ -25,7 +25,8 @@ classifiers = [
]
dependencies = [
"watchdog==2.1.9",
"pydub==0.25.1"
"pydub==0.25.1",
"matplotlib==3.5.3",
]

[project.urls]
Expand Down
62 changes: 55 additions & 7 deletions src/birdnetlib/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from os import path
from birdnetlib.utils import return_week_48_from_datetime
from pathlib import Path
import matplotlib.pyplot as plt

SAMPLE_RATE = 48000

Expand Down Expand Up @@ -37,7 +38,9 @@ def __init__(
self.sample_secs = 3.0
self.duration = None
self.ndarray = None
self.extraction_paths = {}
self.extracted_audio_paths = {}
self.extracted_spectrogram_paths = {}

p = Path(self.path)
self.filestem = p.stem

Expand Down Expand Up @@ -78,9 +81,14 @@ def detections(self):

# Add extraction paths if available.
extraction_key = f"{detection['start_time']}_{detection['end_time']}"
file_path = self.extraction_paths.get(extraction_key, None)
if file_path:
detection["extraction_path"] = file_path
audio_file_path = self.extracted_audio_paths.get(extraction_key, None)
if audio_file_path:
detection["extracted_audio_path"] = audio_file_path
spectrogram_file_path = self.extracted_spectrogram_paths.get(
extraction_key, None
)
if spectrogram_file_path:
detection["extracted_spectrogram_path"] = spectrogram_file_path
qualified_detections.append(detection)

return qualified_detections
Expand Down Expand Up @@ -130,15 +138,15 @@ def read_audio_data(self):

print("read_audio_data: complete, read ", str(len(self.chunks)), "chunks.")

def extract_detection_as_audio(
def extract_detections_as_audio(
self,
directory,
padding_secs=0,
format="flac",
bitrate="192k",
min_conf=0.0,
):
self.extraction_paths = {} # Clear paths before extraction.
self.extracted_audio_paths = {} # Clear paths before extraction.
for detection in self.detections:

# Skip if detection is under min_conf parameter.
Expand Down Expand Up @@ -182,7 +190,47 @@ def extract_detection_as_audio(

# Save path for detections list.
extraction_key = f"{detection['start_time']}_{detection['end_time']}"
self.extraction_paths[extraction_key] = path
self.extracted_audio_paths[extraction_key] = path

def extract_detections_as_spectrogram(
self, directory, padding_secs=0, min_conf=0.0, top=14000, format="jpg", dpi=144
):
self.extracted_spectrogram_paths = {} # Clear paths before extraction.
for detection in self.detections:

# Skip if detection is under min_conf parameter.
# Useful for reducing the number of extracted detections.
if detection["confidence"] < min_conf:
continue

start_sec = int(
detection["start_time"] - padding_secs
if detection["start_time"] > padding_secs
else 0
)
end_sec = int(
detection["end_time"] + padding_secs
if detection["end_time"] + padding_secs < self.duration
else self.duration
)

extract_array = self.ndarray[
start_sec * SAMPLE_RATE : end_sec * SAMPLE_RATE
]

path = f"{directory}/{self.filestem}_{start_sec}s-{end_sec}s.{format}"

plt.specgram(extract_array, Fs=SAMPLE_RATE)
plt.ylim(top=top)
plt.ylabel("frequency kHz")
plt.title(f"{self.filename} ({start_sec}s - {end_sec}s)", fontsize=10)
plt.savefig(path, dpi=dpi)

# Save path for detections list.
extraction_spectrogram_key = (
f"{detection['start_time']}_{detection['end_time']}"
)
self.extracted_spectrogram_paths[extraction_spectrogram_key] = path


class Detection:
Expand Down
141 changes: 129 additions & 12 deletions tests/test_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,29 @@ def test_extraction():
)
recording.analyze()

"""
TODO: Remove this comment after feature is defined.
# TODO: Remove this comment after feature is defined.

# Local development tests.
# # Local development tests.

export_dir = os.path.join(os.path.dirname(__file__), "extractions")
# export_dir = os.path.join(os.path.dirname(__file__), "extractions")

# Export to mp3 @ 128k for all detections with min_conf of 0.8.
recording.extract_detection_as_audio(
directory=export_dir, format="mp3", bitrate="128k", min_conf=0.5
)
# # Export to mp3 @ 128k for all detections with min_conf of 0.8.
# recording.extract_detections_as_audio(
# directory=export_dir, format="mp3", bitrate="128k", min_conf=0.5
# )

# # Extract spectrograms.
# recording.extract_detections_as_spectrogram(
# directory=export_dir, min_conf=0.5, format="jpg"
# )

# return

"""
# Test audio extractions.

# flac test in temporary test directory.
with tempfile.TemporaryDirectory() as export_dir:
recording.extract_detection_as_audio(directory=export_dir)
recording.extract_detections_as_audio(directory=export_dir)

# Check file list.
files = os.listdir(export_dir)
Expand Down Expand Up @@ -71,7 +77,7 @@ def test_extraction():

# wav test in temporary test directory.
with tempfile.TemporaryDirectory() as export_dir:
recording.extract_detection_as_audio(directory=export_dir, format="wav")
recording.extract_detections_as_audio(directory=export_dir, format="wav")

# Check file list.
files = os.listdir(export_dir)
Expand Down Expand Up @@ -99,7 +105,7 @@ def test_extraction():
# mp3 test in temporary test directory (with custom min_conf extraction)
with tempfile.TemporaryDirectory() as export_dir:

recording.extract_detection_as_audio(
recording.extract_detections_as_audio(
directory=export_dir,
format="mp3",
bitrate="128k",
Expand All @@ -124,3 +130,114 @@ def test_extraction():
audio = pydub.AudioSegment.from_mp3(f"{export_dir}/{files[0]}")
assert audio.frame_rate == 48000
assert audio.duration_seconds == 7.0

# Test spectrogram extractions

# spectrogram test in temporary test directory (with custom min_conf extraction)
with tempfile.TemporaryDirectory() as export_dir:

recording.extract_detections_as_spectrogram(
directory=export_dir,
format="jpg",
min_conf=0.4,
padding_secs=2,
)

# Check file list.
files = os.listdir(export_dir)
files.sort()
expected_files = [
"soundscape_40s-47s.jpg",
"soundscape_64s-71s.jpg",
"soundscape_7s-14s.jpg",
"soundscape_82s-89s.jpg",
]
expected_files.sort()

assert files == expected_files

# spectrogram test in temporary test directory (with custom min_conf extraction)
with tempfile.TemporaryDirectory() as export_dir:

recording.extract_detections_as_spectrogram(
directory=export_dir,
format="png",
min_conf=0.4,
)

# Check file list.
files = os.listdir(export_dir)
files.sort()
expected_files = [
"soundscape_42s-45s.png",
"soundscape_66s-69s.png",
"soundscape_84s-87s.png",
"soundscape_9s-12s.png",
]
expected_files.sort()

assert files == expected_files

# Extract audio and spectrogram.

with tempfile.TemporaryDirectory() as export_dir:

recording.extract_detections_as_audio(
directory=export_dir,
format="mp3",
bitrate="128k",
padding_secs=2,
)

recording.extract_detections_as_spectrogram(
directory=export_dir,
format="png",
padding_secs=2,
)

# Check file list.
files = os.listdir(export_dir)
files.sort()

expected_files = [
"soundscape_31s-38s.mp3",
"soundscape_31s-38s.png",
"soundscape_40s-47s.mp3",
"soundscape_40s-47s.png",
"soundscape_49s-56s.mp3",
"soundscape_49s-56s.png",
"soundscape_58s-65s.mp3",
"soundscape_58s-65s.png",
"soundscape_64s-71s.mp3",
"soundscape_64s-71s.png",
"soundscape_67s-74s.mp3",
"soundscape_67s-74s.png",
"soundscape_7s-14s.mp3",
"soundscape_7s-14s.png",
"soundscape_82s-89s.mp3",
"soundscape_82s-89s.png",
"soundscape_91s-98s.mp3",
"soundscape_91s-98s.png",
]
expected_files.sort()

assert files == expected_files
assert len(recording.detections) == 9

detection = recording.detections[0]

# Assert confidence (round for slight float variablity across platforms)
assert round(detection["confidence"], 3) == 0.507

del detection["confidence"]

expected_detection = {
"common_name": "House Finch",
"end_time": 12.0,
"extracted_audio_path": f"{export_dir}/soundscape_7s-14s.mp3",
"extracted_spectrogram_path": f"{export_dir}/soundscape_7s-14s.png",
"scientific_name": "Haemorhous mexicanus",
"start_time": 9.0,
}

assert detection == expected_detection

0 comments on commit db1e13b

Please sign in to comment.