From 51a5502efca35ca8238b57cbac054726863daef1 Mon Sep 17 00:00:00 2001 From: geoffrey45 Date: Fri, 5 May 2023 17:49:59 +0300 Subject: [PATCH] add parsers to clean track titles .ie. remove remaster info + use getters to read flags + use the largest limit to get recent favorites + misc --- app/api/album.py | 5 +- app/api/favorites.py | 61 ++++++++++++++----------- app/lib/populate.py | 2 + app/lib/trackslib.py | 3 +- app/models/album.py | 11 +++-- app/models/track.py | 11 +++-- app/{functions.py => periodic_scan.py} | 18 ++++---- app/serializers/favorites_serializer.py | 3 +- app/settings.py | 31 +++++++++++-- app/start_info_logger.py | 6 +-- app/utils/parsers.py | 54 +++++++++++++++++++++- manage.py | 4 +- 12 files changed, 152 insertions(+), 57 deletions(-) rename app/{functions.py => periodic_scan.py} (78%) diff --git a/app/api/album.py b/app/api/album.py index 1e4c517..c71fe61 100644 --- a/app/api/album.py +++ b/app/api/album.py @@ -137,14 +137,15 @@ def get_album_versions(): return {"msg": "No albumartist provided"} og_album_title: str = data['og_album_title'] - album_title: str = data['album_title'] + base_title: str = data['base_title'] artisthash: str = data['artisthash'] albums = AlbumStore.get_albums_by_artisthash(artisthash) albums = [ a for a in albums - if create_hash(a.title) == create_hash(album_title) and create_hash(og_album_title) != create_hash(a.og_title) + if + create_hash(a.base_title) == create_hash(base_title) and create_hash(og_album_title) != create_hash(a.og_title) ] return { diff --git a/app/api/favorites.py b/app/api/favorites.py index e5b0c12..a5204db 100644 --- a/app/api/favorites.py +++ b/app/api/favorites.py @@ -151,6 +151,7 @@ def get_all_favorites(): track_limit = int(track_limit) album_limit = int(album_limit) artist_limit = int(artist_limit) + largest = max(track_limit, album_limit, artist_limit) favs = favdb.get_all() favs.reverse() @@ -161,21 +162,21 @@ def get_all_favorites(): for fav in favs: if ( - len(tracks) >= track_limit - and len(albums) >= album_limit - and len(artists) >= artist_limit + len(tracks) >= largest + and len(albums) >= largest + and len(artists) >= largest ): break - if not len(tracks) >= track_limit: + if not len(tracks) >= largest: if fav[2] == FavType.track: tracks.append(fav[1]) - if not len(albums) >= album_limit: + if not len(albums) >= largest: if fav[2] == FavType.album: albums.append(fav[1]) - if not len(artists) >= artist_limit: + if not len(artists) >= largest: if fav[2] == FavType.artist: artists.append(fav[1]) @@ -196,31 +197,39 @@ def get_all_favorites(): for fav in first_n: if fav[2] == FavType.track: - track = [t for t in tracks if t.trackhash == fav[1]][0] - recents.append({ - "type": "track", - "item": recent_fav_track_serializer(track) - }) + try: + track = [t for t in tracks if t.trackhash == fav[1]][0] + recents.append({ + "type": "track", + "item": recent_fav_track_serializer(track) + }) + except IndexError: + pass elif fav[2] == FavType.album: - album = [a for a in albums if a.albumhash == fav[1]][0] - recents.append({ - "type": "album", - "item": recent_fav_album_serializer(album) - }) - + try: + album = [a for a in albums if a.albumhash == fav[1]][0] + recents.append({ + "type": "album", + "item": recent_fav_album_serializer(album) + }) + except IndexError: + pass elif fav[2] == FavType.artist: - artist = [a for a in artists if a.artisthash == fav[1]][0] - recents.append({ - "type": "artist", - "item": recent_fav_artist_serializer(artist) - }) + try: + artist = [a for a in artists if a.artisthash == fav[1]][0] + recents.append({ + "type": "artist", + "item": recent_fav_artist_serializer(artist) + }) + except IndexError: + pass return { - "recents": recents, - "tracks": tracks, - "albums": albums, - "artists": artists, + "recents": recents[:album_limit], + "tracks": tracks[:track_limit], + "albums": albums[:album_limit], + "artists": artists[:artist_limit], } diff --git a/app/lib/populate.py b/app/lib/populate.py index 814db85..cc7f436 100644 --- a/app/lib/populate.py +++ b/app/lib/populate.py @@ -8,6 +8,7 @@ from app.db.sqlite.favorite import SQLiteFavoriteMethods as favdb from app.lib.colorlib import ProcessAlbumColors, ProcessArtistColors from app.lib.taglib import extract_thumb, get_tags +from app.lib.trackslib import validate_tracks from app.logger import log from app.models import Album, Artist, Track from app.utils.filesystem import run_fast_scandir @@ -38,6 +39,7 @@ class Populate: global POPULATE_KEY POPULATE_KEY = key + validate_tracks() tracks = get_all_tracks() tracks = list(tracks) diff --git a/app/lib/trackslib.py b/app/lib/trackslib.py index fcc5ce5..caa00ca 100644 --- a/app/lib/trackslib.py +++ b/app/lib/trackslib.py @@ -13,8 +13,7 @@ def validate_tracks() -> None: """ Gets all songs under the ~/ directory. """ - for track in tqdm(TrackStore.tracks, desc="Removing deleted tracks"): + for track in tqdm(TrackStore.tracks, desc="Checking for deleted tracks"): if not os.path.exists(track.filepath): - print(f"Removing {track.filepath}") TrackStore.tracks.remove(track) tdb.remove_track_by_filepath(track.filepath) diff --git a/app/models/album.py b/app/models/album.py index ef69090..c1b4c70 100644 --- a/app/models/album.py +++ b/app/models/album.py @@ -6,7 +6,7 @@ from .artist import Artist from ..utils.hashing import create_hash from ..utils.parsers import parse_feat_from_title, get_base_title_and_versions -from app.settings import FromFlags +from app.settings import get_flag, ParserFlags @dataclass(slots=True) @@ -27,12 +27,14 @@ class Album: date: str = "" og_title: str = "" + base_title: str = "" is_soundtrack: bool = False is_compilation: bool = False is_single: bool = False is_EP: bool = False is_favorite: bool = False is_live: bool = False + genres: list[str] = dataclasses.field(default_factory=list) versions: list[str] = dataclasses.field(default_factory=list) @@ -40,7 +42,7 @@ class Album: self.og_title = self.title self.image = self.albumhash + ".webp" - if FromFlags.EXTRACT_FEAT: + if get_flag(ParserFlags.EXTRACT_FEAT): featured, self.title = parse_feat_from_title(self.title) if len(featured) > 0: @@ -50,11 +52,14 @@ class Album: from ..store.tracks import TrackStore TrackStore.append_track_artists(self.albumhash, featured, self.title) - if FromFlags.REMOVE_REMASTER: + if get_flag(ParserFlags.CLEAN_ALBUM_TITLE): + # if FromFlags.CLEAN_ALBUM_TITLE: self.title, self.versions = get_base_title_and_versions(self.title) if "super_deluxe" in self.versions: self.versions.remove("deluxe") + else: + self.base_title = get_base_title_and_versions(self.title, get_versions=False)[0] self.albumartists_hashes = "-".join(a.artisthash for a in self.albumartists) diff --git a/app/models/track.py b/app/models/track.py index d3a73a5..3d4c95d 100644 --- a/app/models/track.py +++ b/app/models/track.py @@ -1,10 +1,10 @@ import dataclasses from dataclasses import dataclass -from app.settings import FromFlags +from app.settings import FromFlags, get_flag, ParserFlags from .artist import ArtistMinimal from app.utils.hashing import create_hash -from app.utils.parsers import split_artists, remove_prod, parse_feat_from_title +from app.utils.parsers import split_artists, remove_prod, parse_feat_from_title, clean_title @dataclass(slots=True) @@ -42,18 +42,21 @@ class Track: artists = split_artists(self.artist) new_title = self.title - if FromFlags.EXTRACT_FEAT: + if get_flag(ParserFlags.EXTRACT_FEAT): featured, new_title = parse_feat_from_title(self.title) original_lower = "-".join([a.lower() for a in artists]) artists.extend([a for a in featured if a.lower() not in original_lower]) - if FromFlags.REMOVE_PROD: + if get_flag(ParserFlags.REMOVE_PROD): new_title = remove_prod(new_title) # if track is a single if self.og_title == self.album: self.album = new_title + if get_flag(ParserFlags.REMOVE_REMASTER_FROM_TRACK): + new_title = clean_title(new_title) + self.title = new_title self.artist_hashes = "-".join(create_hash(a, decode=True) for a in artists) diff --git a/app/functions.py b/app/periodic_scan.py similarity index 78% rename from app/functions.py rename to app/periodic_scan.py index 428486c..f884b39 100644 --- a/app/functions.py +++ b/app/periodic_scan.py @@ -2,28 +2,29 @@ This module contains functions for the server """ import time -from requests import ConnectionError as RequestConnectionError from requests import ReadTimeout +from requests import ConnectionError as RequestConnectionError +from app.logger import log from app.lib.artistlib import CheckArtistImages from app.lib.populate import Populate, PopulateCancelledError -from app.lib.trackslib import validate_tracks -from app.logger import log + from app.utils.generators import get_random_str from app.utils.network import Ping from app.utils.threading import background +from app.settings import ParserFlags, get_flag, get_scan_sleep_time + @background -def run_periodic_checks(): +def run_periodic_scans(): """ - Checks for new songs every N minutes. + Runs periodic scans. """ # ValidateAlbumThumbs() # ValidatePlaylistThumbs() - validate_tracks() - while True: + while get_flag(ParserFlags.DO_PERIODIC_SCANS): try: Populate(key=get_random_str()) except PopulateCancelledError: @@ -37,4 +38,5 @@ def run_periodic_checks(): "Internet connection lost. Downloading artist images stopped." ) - time.sleep(300) + sleep_time = get_scan_sleep_time() + time.sleep(sleep_time) diff --git a/app/serializers/favorites_serializer.py b/app/serializers/favorites_serializer.py index f907c64..e0a5649 100644 --- a/app/serializers/favorites_serializer.py +++ b/app/serializers/favorites_serializer.py @@ -21,8 +21,9 @@ def recent_fav_album_serializer(album: Album) -> dict: """ return { "image": album.image, - "title": album.title, + "title": album.og_title, "albumhash": album.albumhash, + "colors": album.colors, } diff --git a/app/settings.py b/app/settings.py index 7bef97b..0ce317b 100644 --- a/app/settings.py +++ b/app/settings.py @@ -2,6 +2,7 @@ Contains default configs """ import os +from enum import Enum join = os.path.join @@ -122,18 +123,40 @@ class ALLARGS: class FromFlags: - EXTRACT_FEAT = True + EXTRACT_FEAT = False """ Whether to extract the featured artists from the song title. """ - REMOVE_PROD = True + REMOVE_PROD = False """ Whether to remove the producers from the song title. """ - REMOVE_REMASTER = True - MERGE_REMASTERS = True + CLEAN_ALBUM_TITLE = False + REMOVE_REMASTER_FROM_TRACK = False + + DO_PERIODIC_SCANS = True + PERIODIC_SCAN_INTERVAL = 300 # seconds + + # MERGE_ALBUM_VERSIONS = True + + +class ParserFlags(Enum): + EXTRACT_FEAT = 'EXTRACT_FEAT' + REMOVE_PROD = 'REMOVE_PROD' + CLEAN_ALBUM_TITLE = 'CLEAN_ALBUM_TITLE' + REMOVE_REMASTER_FROM_TRACK = 'REMOVE_REMASTER_FROM_TRACK' + DO_PERIODIC_SCANS = 'DO_PERIODIC_SCANS' + PERIODIC_SCAN_INTERVAL = 'PERIODIC_SCAN_INTERVAL' + + +def get_flag(flag: ParserFlags) -> bool: + return getattr(FromFlags, flag.value) + + +def get_scan_sleep_time() -> int: + return FromFlags.PERIODIC_SCAN_INTERVAL class TCOLOR: diff --git a/app/start_info_logger.py b/app/start_info_logger.py index 4d43753..c7f9ea8 100644 --- a/app/start_info_logger.py +++ b/app/start_info_logger.py @@ -1,6 +1,6 @@ import os -from app.settings import TCOLOR, Release, FLASKVARS, Paths, FromFlags +from app.settings import TCOLOR, Release, FLASKVARS, Paths, FromFlags, get_flag, ParserFlags from app.utils.network import get_ip @@ -30,11 +30,11 @@ def log_startup_info(): to_print = [ [ "Extract featured artists from titles", - FromFlags.EXTRACT_FEAT + get_flag(ParserFlags.EXTRACT_FEAT) ], [ "Remove prod. from titles", - FromFlags.REMOVE_PROD + get_flag(ParserFlags.REMOVE_PROD) ] ] diff --git a/app/utils/parsers.py b/app/utils/parsers.py index 0736f07..fc0429e 100644 --- a/app/utils/parsers.py +++ b/app/utils/parsers.py @@ -3,6 +3,9 @@ from enum import Enum def split_artists(src: str, with_and: bool = False): + """ + Splits a string of artists into a list of artists. + """ exp = r"\s*(?: and |&|,|;)\s*" if with_and else r"\s*[,;]\s*" artists = re.split(exp, src) @@ -88,8 +91,12 @@ def parse_feat_from_title(title: str) -> tuple[list[str], str]: def get_base_album_title(string) -> tuple[str, str | None]: + """ + Extracts the base album title from a string. + """ pattern = re.compile(r'\s*(\(|\[)[^\)\]]*?(version|remaster|deluxe|edition|expanded|anniversary)[^\)\]]*?(\)|\])$', re.IGNORECASE) + # TODO: Fix "Redundant character escape '\]' in RegExp " match = pattern.search(string) if match: @@ -101,6 +108,9 @@ def get_base_album_title(string) -> tuple[str, str | None]: class AlbumVersionEnum(Enum): + """ + Enum for album versions. + """ Explicit = ("explicit",) ANNIVERSARY = ("anniversary",) @@ -118,7 +128,7 @@ class AlbumVersionEnum(Enum): LEGACY = ("legacy",) SPECIAL = ("special",) - COLLECTORS = ("collector",) + COLLECTORS_EDITION = ("collector",) ARCHIVE = ("archive",) Acoustic = ("acoustic",) @@ -149,6 +159,9 @@ class AlbumVersionEnum(Enum): def get_anniversary(text: str) -> str | None: + """ + Extracts anniversary from text using regex. + """ _end = "anniversary" match = re.search(r"\b\d+\w*(?= anniversary)", text, re.IGNORECASE) if match: @@ -158,6 +171,9 @@ def get_anniversary(text: str) -> str | None: def get_album_info(bracket_text: str | None) -> list[str]: + """ + Extracts album version info from the bracketed text on an album title string using regex. + """ if not bracket_text: return [] @@ -177,12 +193,18 @@ def get_album_info(bracket_text: str | None) -> list[str]: return versions -def get_base_title_and_versions(original_album_title: str) -> tuple[str, list[str]]: +def get_base_title_and_versions(original_album_title: str, get_versions=True) -> tuple[str, list[str]]: + """ + Extracts the base album title and version info from an album title string using regex. + """ album_title, version_block = get_base_album_title(original_album_title) if version_block is None: return original_album_title, [] + if not get_versions: + return album_title, [] + versions = get_album_info(version_block) # if no version info could be extracted, accept defeat! @@ -190,3 +212,31 @@ def get_base_title_and_versions(original_album_title: str) -> tuple[str, list[st album_title = original_album_title return album_title, versions + + +def remove_bracketed_remaster(text: str): + """ + Removes remaster info from a track title that contains brackets using regex. + """ + return re.sub(r'\s*[\\[(][^)\]]*remaster[^)\]]*[)\]]\s*', '', text, flags=re.IGNORECASE).strip() + + +def remove_hyphen_remasters(text: str): + """ + Removes remaster info from a track title that contains a hypen (-) using regex. + """ + return re.sub(r'\s-\s*[^-]*\bremaster[^-]*\s*', '', text, flags=re.IGNORECASE).strip() + + +def clean_title(title: str) -> str: + """ + Removes remaster info from a track title using regex. + """ + if "remaster" not in title.lower(): + return title + + if "-" in title: + return remove_hyphen_remasters(title) + + if "[" in title or "(" in title: + return remove_bracketed_remaster(title) diff --git a/manage.py b/manage.py index ab07b7e..dc9e846 100644 --- a/manage.py +++ b/manage.py @@ -5,7 +5,7 @@ import logging from app.api import create_api from app.arg_handler import HandleArgs -from app.functions import run_periodic_checks +from app.periodic_scan import run_periodic_scans from app.lib.watchdogg import Watcher as WatchDog from app.settings import FLASKVARS from app.setup import run_setup @@ -39,7 +39,7 @@ def serve_client(): @background def bg_run_setup() -> None: run_setup() - run_periodic_checks() + run_periodic_scans() @background