Skip to content

Commit

Permalink
working function that cleans up discographies
Browse files Browse the repository at this point in the history
also added bit depth and sampling rate to download logging
  • Loading branch information
nathom committed Mar 4, 2021
1 parent 93f9d8d commit 628e0a6
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 5 deletions.
129 changes: 125 additions & 4 deletions qobuz_dl/core.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# ----- Testing ------
import json

# --------------------
import logging
import os
import re
Expand Down Expand Up @@ -67,6 +71,7 @@ def __init__(
folder_format="{artist} - {album} ({year}) [{bit_depth}B-"
"{sampling_rate}kHz]",
track_format="{tracknumber}. {tracktitle}",
smart_discography=False,
):
self.directory = self.create_dir(directory)
self.quality = quality
Expand All @@ -82,6 +87,7 @@ def __init__(
self.downloads_db = create_db(downloads_db) if downloads_db else None
self.folder_format = folder_format
self.track_format = track_format
self.smart_discography = smart_discography

def initialize_client(self, email, pwd, app_id, secrets):
self.client = qopy.Client(email, pwd, app_id, secrets)
Expand All @@ -100,14 +106,14 @@ def create_dir(self, directory=None):
return fix

def get_url_info(self, url: str) -> Tuple[str, str]:
'''Returns the type of the url and the id.
"""Returns the type of the url and the id.
Compatible with urls of the form:
https://www.qobuz.com/us-en/{type}/{name}/{id}
https://open.qobuz.com/{type}/{id}
https://play.qobuz.com/{type}/{id}
/us-en/{type}/-/{id}
'''
"""

r = re.search(
r"(?:https:\/\/(?:w{3}|open|play)\.qobuz\.com)?(?:\/[a-z]{2}-[a-z]{2})"
Expand Down Expand Up @@ -178,7 +184,11 @@ def handle_url(self, url):
new_path = self.create_dir(
os.path.join(self.directory, sanitize_filename(content_name))
)
items = [item[type_dict["iterable_key"]]["items"] for item in content][0]

# items = [item[type_dict["iterable_key"]]["items"] for item in content][0]
items = self.smart_discography_filter(
content, True, True,
)
logger.info(f"{YELLOW}{len(items)} downloads in queue")
for item in items:
self.download_from_id(
Expand Down Expand Up @@ -416,7 +426,9 @@ def download_lastfm_pl(self, playlist_url):
)

for i in track_list:
track_id = self.get_url_info(self.search_by_type(i, "track", 1, lucky=True)[0])[1]
track_id = self.get_url_info(
self.search_by_type(i, "track", 1, lucky=True)[0]
)[1]
if track_id:
self.download_from_id(track_id, False, pl_directory)

Expand Down Expand Up @@ -468,3 +480,112 @@ def make_m3u(self, pl_directory):
if len(track_list) > 1:
with open(os.path.join(pl_directory, pl_name), "w") as pl:
pl.write("\n\n".join(track_list))

def smart_discography_filter(
self, contents: list, save_space=False, remove_extras=False
) -> list:
"""When downloading some artists' discography, there can be a lot
of duplicate albums that needlessly use 10's of GB of bandwidth. This
filters the duplicates.
Example (Stevie Wonder):
* ...
* Songs In The Key of Life [24/192]
* Songs In The Key of Life [24/96]
* Songs In The Key of Life [16/44.1]
* ...
This function should choose either [24/96] or [24/192].
It also skips deluxe albums in favor of the originals, picks remasters
in favor of originals, and removes albums by other artists that just
feature the requested artist.
"""

def print_album(a: dict):
print(
f"{album['title']} - {album['version']} ({album['maximum_bit_depth']}/{album['maximum_sampling_rate']})"
)

def remastered(s: str) -> bool:
"""Case insensitive match to check whether
an album is remastered.
"""
if s is None:
return False
return re.match(r"(?i)(re)?master(ed)?", s) is not None

def extra(album: dict) -> bool:
assert hasattr(album, "__getitem__"), "param must be dict-like"
if 'version' not in album:
return False
return (
re.findall(
r"(?i)(anniversary|deluxe|live|collector|demo)",
f"{album['title']} {album['version']}",
)
!= []
)

# remove all albums by other artists
artist = contents[0]["name"]
items = [item["albums"]["items"] for item in contents][0]
artist_f = [] # artist filtered
for item in items:
if item["artist"]["name"] == artist:
artist_f.append(item)

# use dicts to group duplicate titles together
titles_f = dict()
for item in artist_f:
if (t := item["title"]) not in titles_f:
titles_f[t] = []
titles_f[t].append(item)

# pick desired quality out of duplicates
# remasters are given preferred status
quality_f = []
for albums in titles_f.values():
# no duplicates for title
if len(albums) == 1:
quality_f.append(albums[0])
continue

# desired bit depth and sampling rate
bit_depth = max(a["maximum_bit_depth"] for a in albums)
# having sampling rate > 44.1kHz is a waste of space
# https://en.wikipedia.org/wiki/Nyquist–Shannon_sampling_theorem
# https://en.wikipedia.org/wiki/44,100_Hz#Human_hearing_and_signal_processing
cmp_func = min if save_space else max
sampling_rate = cmp_func(
a["maximum_sampling_rate"]
for a in albums
if a["maximum_bit_depth"] == bit_depth
)
has_remaster = bool([a for a in albums if remastered(a["version"])])

# check if album has desired bit depth and sampling rate
# if there is a remaster in `item`, check if the album is a remaster
for album in albums:
if (
album["maximum_bit_depth"] == bit_depth
and album["maximum_sampling_rate"] == sampling_rate
):
if not has_remaster:
quality_f.append(album)
elif remastered(album["version"]):
quality_f.append(album)

if remove_extras:
final = []
# this filters those huge albums with outtakes, live performances etc.
for album in quality_f:
if not extra(album):
final.append(album)
else:
final = quality_f

return final
# key = lambda a: a["title"]
# final.sort(key=key)
# for album in final:
# print_album(album)
2 changes: 1 addition & 1 deletion qobuz_dl/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def download_id_by_type(
return

logger.info(
f"\n{YELLOW}Downloading: {album_title}\n" f"Quality: {file_format}\n"
f"\n{YELLOW}Downloading: {album_title}\nQuality: {file_format} ({bit_depth}/{sampling_rate})\n"
)
album_attr = {
"artist": meta["artist"]["name"],
Expand Down

0 comments on commit 628e0a6

Please sign in to comment.