add parsers to clean track titles .ie. remove remaster info

+ use getters to read flags
+ use the largest limit to get recent favorites
+ misc
This commit is contained in:
geoffrey45 2023-05-05 17:49:59 +03:00
parent 5d0b59ea60
commit 51a5502efc
12 changed files with 152 additions and 57 deletions

View File

@ -137,14 +137,15 @@ def get_album_versions():
return {"msg": "No albumartist provided"} return {"msg": "No albumartist provided"}
og_album_title: str = data['og_album_title'] og_album_title: str = data['og_album_title']
album_title: str = data['album_title'] base_title: str = data['base_title']
artisthash: str = data['artisthash'] artisthash: str = data['artisthash']
albums = AlbumStore.get_albums_by_artisthash(artisthash) albums = AlbumStore.get_albums_by_artisthash(artisthash)
albums = [ albums = [
a for a in albums a for a in albums
if create_hash(a.title) == create_hash(album_title) and create_hash(og_album_title) != create_hash(a.og_title) if
create_hash(a.base_title) == create_hash(base_title) and create_hash(og_album_title) != create_hash(a.og_title)
] ]
return { return {

View File

@ -151,6 +151,7 @@ def get_all_favorites():
track_limit = int(track_limit) track_limit = int(track_limit)
album_limit = int(album_limit) album_limit = int(album_limit)
artist_limit = int(artist_limit) artist_limit = int(artist_limit)
largest = max(track_limit, album_limit, artist_limit)
favs = favdb.get_all() favs = favdb.get_all()
favs.reverse() favs.reverse()
@ -161,21 +162,21 @@ def get_all_favorites():
for fav in favs: for fav in favs:
if ( if (
len(tracks) >= track_limit len(tracks) >= largest
and len(albums) >= album_limit and len(albums) >= largest
and len(artists) >= artist_limit and len(artists) >= largest
): ):
break break
if not len(tracks) >= track_limit: if not len(tracks) >= largest:
if fav[2] == FavType.track: if fav[2] == FavType.track:
tracks.append(fav[1]) tracks.append(fav[1])
if not len(albums) >= album_limit: if not len(albums) >= largest:
if fav[2] == FavType.album: if fav[2] == FavType.album:
albums.append(fav[1]) albums.append(fav[1])
if not len(artists) >= artist_limit: if not len(artists) >= largest:
if fav[2] == FavType.artist: if fav[2] == FavType.artist:
artists.append(fav[1]) artists.append(fav[1])
@ -196,31 +197,39 @@ def get_all_favorites():
for fav in first_n: for fav in first_n:
if fav[2] == FavType.track: if fav[2] == FavType.track:
track = [t for t in tracks if t.trackhash == fav[1]][0] try:
recents.append({ track = [t for t in tracks if t.trackhash == fav[1]][0]
"type": "track", recents.append({
"item": recent_fav_track_serializer(track) "type": "track",
}) "item": recent_fav_track_serializer(track)
})
except IndexError:
pass
elif fav[2] == FavType.album: elif fav[2] == FavType.album:
album = [a for a in albums if a.albumhash == fav[1]][0] try:
recents.append({ album = [a for a in albums if a.albumhash == fav[1]][0]
"type": "album", recents.append({
"item": recent_fav_album_serializer(album) "type": "album",
}) "item": recent_fav_album_serializer(album)
})
except IndexError:
pass
elif fav[2] == FavType.artist: elif fav[2] == FavType.artist:
artist = [a for a in artists if a.artisthash == fav[1]][0] try:
recents.append({ artist = [a for a in artists if a.artisthash == fav[1]][0]
"type": "artist", recents.append({
"item": recent_fav_artist_serializer(artist) "type": "artist",
}) "item": recent_fav_artist_serializer(artist)
})
except IndexError:
pass
return { return {
"recents": recents, "recents": recents[:album_limit],
"tracks": tracks, "tracks": tracks[:track_limit],
"albums": albums, "albums": albums[:album_limit],
"artists": artists, "artists": artists[:artist_limit],
} }

View File

@ -8,6 +8,7 @@ from app.db.sqlite.favorite import SQLiteFavoriteMethods as favdb
from app.lib.colorlib import ProcessAlbumColors, ProcessArtistColors from app.lib.colorlib import ProcessAlbumColors, ProcessArtistColors
from app.lib.taglib import extract_thumb, get_tags from app.lib.taglib import extract_thumb, get_tags
from app.lib.trackslib import validate_tracks
from app.logger import log from app.logger import log
from app.models import Album, Artist, Track from app.models import Album, Artist, Track
from app.utils.filesystem import run_fast_scandir from app.utils.filesystem import run_fast_scandir
@ -38,6 +39,7 @@ class Populate:
global POPULATE_KEY global POPULATE_KEY
POPULATE_KEY = key POPULATE_KEY = key
validate_tracks()
tracks = get_all_tracks() tracks = get_all_tracks()
tracks = list(tracks) tracks = list(tracks)

View File

@ -13,8 +13,7 @@ def validate_tracks() -> None:
""" """
Gets all songs under the ~/ directory. Gets all songs under the ~/ directory.
""" """
for track in tqdm(TrackStore.tracks, desc="Removing deleted tracks"): for track in tqdm(TrackStore.tracks, desc="Checking for deleted tracks"):
if not os.path.exists(track.filepath): if not os.path.exists(track.filepath):
print(f"Removing {track.filepath}")
TrackStore.tracks.remove(track) TrackStore.tracks.remove(track)
tdb.remove_track_by_filepath(track.filepath) tdb.remove_track_by_filepath(track.filepath)

View File

@ -6,7 +6,7 @@ from .artist import Artist
from ..utils.hashing import create_hash from ..utils.hashing import create_hash
from ..utils.parsers import parse_feat_from_title, get_base_title_and_versions from ..utils.parsers import parse_feat_from_title, get_base_title_and_versions
from app.settings import FromFlags from app.settings import get_flag, ParserFlags
@dataclass(slots=True) @dataclass(slots=True)
@ -27,12 +27,14 @@ class Album:
date: str = "" date: str = ""
og_title: str = "" og_title: str = ""
base_title: str = ""
is_soundtrack: bool = False is_soundtrack: bool = False
is_compilation: bool = False is_compilation: bool = False
is_single: bool = False is_single: bool = False
is_EP: bool = False is_EP: bool = False
is_favorite: bool = False is_favorite: bool = False
is_live: bool = False is_live: bool = False
genres: list[str] = dataclasses.field(default_factory=list) genres: list[str] = dataclasses.field(default_factory=list)
versions: list[str] = dataclasses.field(default_factory=list) versions: list[str] = dataclasses.field(default_factory=list)
@ -40,7 +42,7 @@ class Album:
self.og_title = self.title self.og_title = self.title
self.image = self.albumhash + ".webp" self.image = self.albumhash + ".webp"
if FromFlags.EXTRACT_FEAT: if get_flag(ParserFlags.EXTRACT_FEAT):
featured, self.title = parse_feat_from_title(self.title) featured, self.title = parse_feat_from_title(self.title)
if len(featured) > 0: if len(featured) > 0:
@ -50,11 +52,14 @@ class Album:
from ..store.tracks import TrackStore from ..store.tracks import TrackStore
TrackStore.append_track_artists(self.albumhash, featured, self.title) TrackStore.append_track_artists(self.albumhash, featured, self.title)
if FromFlags.REMOVE_REMASTER: if get_flag(ParserFlags.CLEAN_ALBUM_TITLE):
# if FromFlags.CLEAN_ALBUM_TITLE:
self.title, self.versions = get_base_title_and_versions(self.title) self.title, self.versions = get_base_title_and_versions(self.title)
if "super_deluxe" in self.versions: if "super_deluxe" in self.versions:
self.versions.remove("deluxe") self.versions.remove("deluxe")
else:
self.base_title = get_base_title_and_versions(self.title, get_versions=False)[0]
self.albumartists_hashes = "-".join(a.artisthash for a in self.albumartists) self.albumartists_hashes = "-".join(a.artisthash for a in self.albumartists)

View File

@ -1,10 +1,10 @@
import dataclasses import dataclasses
from dataclasses import dataclass from dataclasses import dataclass
from app.settings import FromFlags from app.settings import FromFlags, get_flag, ParserFlags
from .artist import ArtistMinimal from .artist import ArtistMinimal
from app.utils.hashing import create_hash from app.utils.hashing import create_hash
from app.utils.parsers import split_artists, remove_prod, parse_feat_from_title from app.utils.parsers import split_artists, remove_prod, parse_feat_from_title, clean_title
@dataclass(slots=True) @dataclass(slots=True)
@ -42,18 +42,21 @@ class Track:
artists = split_artists(self.artist) artists = split_artists(self.artist)
new_title = self.title new_title = self.title
if FromFlags.EXTRACT_FEAT: if get_flag(ParserFlags.EXTRACT_FEAT):
featured, new_title = parse_feat_from_title(self.title) featured, new_title = parse_feat_from_title(self.title)
original_lower = "-".join([a.lower() for a in artists]) original_lower = "-".join([a.lower() for a in artists])
artists.extend([a for a in featured if a.lower() not in original_lower]) artists.extend([a for a in featured if a.lower() not in original_lower])
if FromFlags.REMOVE_PROD: if get_flag(ParserFlags.REMOVE_PROD):
new_title = remove_prod(new_title) new_title = remove_prod(new_title)
# if track is a single # if track is a single
if self.og_title == self.album: if self.og_title == self.album:
self.album = new_title self.album = new_title
if get_flag(ParserFlags.REMOVE_REMASTER_FROM_TRACK):
new_title = clean_title(new_title)
self.title = new_title self.title = new_title
self.artist_hashes = "-".join(create_hash(a, decode=True) for a in artists) self.artist_hashes = "-".join(create_hash(a, decode=True) for a in artists)

View File

@ -2,28 +2,29 @@
This module contains functions for the server This module contains functions for the server
""" """
import time import time
from requests import ConnectionError as RequestConnectionError
from requests import ReadTimeout from requests import ReadTimeout
from requests import ConnectionError as RequestConnectionError
from app.logger import log
from app.lib.artistlib import CheckArtistImages from app.lib.artistlib import CheckArtistImages
from app.lib.populate import Populate, PopulateCancelledError from app.lib.populate import Populate, PopulateCancelledError
from app.lib.trackslib import validate_tracks
from app.logger import log
from app.utils.generators import get_random_str from app.utils.generators import get_random_str
from app.utils.network import Ping from app.utils.network import Ping
from app.utils.threading import background from app.utils.threading import background
from app.settings import ParserFlags, get_flag, get_scan_sleep_time
@background @background
def run_periodic_checks(): def run_periodic_scans():
""" """
Checks for new songs every N minutes. Runs periodic scans.
""" """
# ValidateAlbumThumbs() # ValidateAlbumThumbs()
# ValidatePlaylistThumbs() # ValidatePlaylistThumbs()
validate_tracks()
while True: while get_flag(ParserFlags.DO_PERIODIC_SCANS):
try: try:
Populate(key=get_random_str()) Populate(key=get_random_str())
except PopulateCancelledError: except PopulateCancelledError:
@ -37,4 +38,5 @@ def run_periodic_checks():
"Internet connection lost. Downloading artist images stopped." "Internet connection lost. Downloading artist images stopped."
) )
time.sleep(300) sleep_time = get_scan_sleep_time()
time.sleep(sleep_time)

View File

@ -21,8 +21,9 @@ def recent_fav_album_serializer(album: Album) -> dict:
""" """
return { return {
"image": album.image, "image": album.image,
"title": album.title, "title": album.og_title,
"albumhash": album.albumhash, "albumhash": album.albumhash,
"colors": album.colors,
} }

View File

@ -2,6 +2,7 @@
Contains default configs Contains default configs
""" """
import os import os
from enum import Enum
join = os.path.join join = os.path.join
@ -122,18 +123,40 @@ class ALLARGS:
class FromFlags: class FromFlags:
EXTRACT_FEAT = True EXTRACT_FEAT = False
""" """
Whether to extract the featured artists from the song title. Whether to extract the featured artists from the song title.
""" """
REMOVE_PROD = True REMOVE_PROD = False
""" """
Whether to remove the producers from the song title. Whether to remove the producers from the song title.
""" """
REMOVE_REMASTER = True CLEAN_ALBUM_TITLE = False
MERGE_REMASTERS = True REMOVE_REMASTER_FROM_TRACK = False
DO_PERIODIC_SCANS = True
PERIODIC_SCAN_INTERVAL = 300 # seconds
# MERGE_ALBUM_VERSIONS = True
class ParserFlags(Enum):
EXTRACT_FEAT = 'EXTRACT_FEAT'
REMOVE_PROD = 'REMOVE_PROD'
CLEAN_ALBUM_TITLE = 'CLEAN_ALBUM_TITLE'
REMOVE_REMASTER_FROM_TRACK = 'REMOVE_REMASTER_FROM_TRACK'
DO_PERIODIC_SCANS = 'DO_PERIODIC_SCANS'
PERIODIC_SCAN_INTERVAL = 'PERIODIC_SCAN_INTERVAL'
def get_flag(flag: ParserFlags) -> bool:
return getattr(FromFlags, flag.value)
def get_scan_sleep_time() -> int:
return FromFlags.PERIODIC_SCAN_INTERVAL
class TCOLOR: class TCOLOR:

View File

@ -1,6 +1,6 @@
import os import os
from app.settings import TCOLOR, Release, FLASKVARS, Paths, FromFlags from app.settings import TCOLOR, Release, FLASKVARS, Paths, FromFlags, get_flag, ParserFlags
from app.utils.network import get_ip from app.utils.network import get_ip
@ -30,11 +30,11 @@ def log_startup_info():
to_print = [ to_print = [
[ [
"Extract featured artists from titles", "Extract featured artists from titles",
FromFlags.EXTRACT_FEAT get_flag(ParserFlags.EXTRACT_FEAT)
], ],
[ [
"Remove prod. from titles", "Remove prod. from titles",
FromFlags.REMOVE_PROD get_flag(ParserFlags.REMOVE_PROD)
] ]
] ]

View File

@ -3,6 +3,9 @@ from enum import Enum
def split_artists(src: str, with_and: bool = False): def split_artists(src: str, with_and: bool = False):
"""
Splits a string of artists into a list of artists.
"""
exp = r"\s*(?: and |&|,|;)\s*" if with_and else r"\s*[,;]\s*" exp = r"\s*(?: and |&|,|;)\s*" if with_and else r"\s*[,;]\s*"
artists = re.split(exp, src) artists = re.split(exp, src)
@ -88,8 +91,12 @@ def parse_feat_from_title(title: str) -> tuple[list[str], str]:
def get_base_album_title(string) -> tuple[str, str | None]: def get_base_album_title(string) -> tuple[str, str | None]:
"""
Extracts the base album title from a string.
"""
pattern = re.compile(r'\s*(\(|\[)[^\)\]]*?(version|remaster|deluxe|edition|expanded|anniversary)[^\)\]]*?(\)|\])$', pattern = re.compile(r'\s*(\(|\[)[^\)\]]*?(version|remaster|deluxe|edition|expanded|anniversary)[^\)\]]*?(\)|\])$',
re.IGNORECASE) re.IGNORECASE)
# TODO: Fix "Redundant character escape '\]' in RegExp "
match = pattern.search(string) match = pattern.search(string)
if match: if match:
@ -101,6 +108,9 @@ def get_base_album_title(string) -> tuple[str, str | None]:
class AlbumVersionEnum(Enum): class AlbumVersionEnum(Enum):
"""
Enum for album versions.
"""
Explicit = ("explicit",) Explicit = ("explicit",)
ANNIVERSARY = ("anniversary",) ANNIVERSARY = ("anniversary",)
@ -118,7 +128,7 @@ class AlbumVersionEnum(Enum):
LEGACY = ("legacy",) LEGACY = ("legacy",)
SPECIAL = ("special",) SPECIAL = ("special",)
COLLECTORS = ("collector",) COLLECTORS_EDITION = ("collector",)
ARCHIVE = ("archive",) ARCHIVE = ("archive",)
Acoustic = ("acoustic",) Acoustic = ("acoustic",)
@ -149,6 +159,9 @@ class AlbumVersionEnum(Enum):
def get_anniversary(text: str) -> str | None: def get_anniversary(text: str) -> str | None:
"""
Extracts anniversary from text using regex.
"""
_end = "anniversary" _end = "anniversary"
match = re.search(r"\b\d+\w*(?= anniversary)", text, re.IGNORECASE) match = re.search(r"\b\d+\w*(?= anniversary)", text, re.IGNORECASE)
if match: if match:
@ -158,6 +171,9 @@ def get_anniversary(text: str) -> str | None:
def get_album_info(bracket_text: str | None) -> list[str]: def get_album_info(bracket_text: str | None) -> list[str]:
"""
Extracts album version info from the bracketed text on an album title string using regex.
"""
if not bracket_text: if not bracket_text:
return [] return []
@ -177,12 +193,18 @@ def get_album_info(bracket_text: str | None) -> list[str]:
return versions return versions
def get_base_title_and_versions(original_album_title: str) -> tuple[str, list[str]]: def get_base_title_and_versions(original_album_title: str, get_versions=True) -> tuple[str, list[str]]:
"""
Extracts the base album title and version info from an album title string using regex.
"""
album_title, version_block = get_base_album_title(original_album_title) album_title, version_block = get_base_album_title(original_album_title)
if version_block is None: if version_block is None:
return original_album_title, [] return original_album_title, []
if not get_versions:
return album_title, []
versions = get_album_info(version_block) versions = get_album_info(version_block)
# if no version info could be extracted, accept defeat! # if no version info could be extracted, accept defeat!
@ -190,3 +212,31 @@ def get_base_title_and_versions(original_album_title: str) -> tuple[str, list[st
album_title = original_album_title album_title = original_album_title
return album_title, versions return album_title, versions
def remove_bracketed_remaster(text: str):
"""
Removes remaster info from a track title that contains brackets using regex.
"""
return re.sub(r'\s*[\\[(][^)\]]*remaster[^)\]]*[)\]]\s*', '', text, flags=re.IGNORECASE).strip()
def remove_hyphen_remasters(text: str):
"""
Removes remaster info from a track title that contains a hypen (-) using regex.
"""
return re.sub(r'\s-\s*[^-]*\bremaster[^-]*\s*', '', text, flags=re.IGNORECASE).strip()
def clean_title(title: str) -> str:
"""
Removes remaster info from a track title using regex.
"""
if "remaster" not in title.lower():
return title
if "-" in title:
return remove_hyphen_remasters(title)
if "[" in title or "(" in title:
return remove_bracketed_remaster(title)

View File

@ -5,7 +5,7 @@ import logging
from app.api import create_api from app.api import create_api
from app.arg_handler import HandleArgs from app.arg_handler import HandleArgs
from app.functions import run_periodic_checks from app.periodic_scan import run_periodic_scans
from app.lib.watchdogg import Watcher as WatchDog from app.lib.watchdogg import Watcher as WatchDog
from app.settings import FLASKVARS from app.settings import FLASKVARS
from app.setup import run_setup from app.setup import run_setup
@ -39,7 +39,7 @@ def serve_client():
@background @background
def bg_run_setup() -> None: def bg_run_setup() -> None:
run_setup() run_setup()
run_periodic_checks() run_periodic_scans()
@background @background