rewrite split_artists parser to remove regex

+ write placehold functions to manage custom separators
This commit is contained in:
mungai-njoroge 2023-06-26 14:21:11 +03:00
parent 1cf5d38a96
commit 4a9d6bc3e6
4 changed files with 54 additions and 16 deletions

View File

@ -24,7 +24,8 @@ CREATE TABLE IF NOT EXISTS favorites (
CREATE TABLE IF NOT EXISTS settings ( CREATE TABLE IF NOT EXISTS settings (
id integer PRIMARY KEY, id integer PRIMARY KEY,
root_dirs text NOT NULL, root_dirs text NOT NULL,
exclude_dirs text exclude_dirs text,
artist_separators text
) )
""" """

View File

@ -88,3 +88,27 @@ class SettingsSQLMethods:
cur.execute(sql) cur.execute(sql)
dirs = cur.fetchall() dirs = cur.fetchall()
return [_dir[0] for _dir in dirs] return [_dir[0] for _dir in dirs]
@staticmethod
def add_artist_separators(seps: set[str]):
"""
Adds a set of artist separators to the userdata table.
"""
# TODO: Implement
pass
@staticmethod
def get_artist_separators() -> set[str]:
"""
Gets a set of artist separators from the userdata table.
"""
# TODO: Implement
pass
@staticmethod
def remove_artist_separators(seps: set[str]):
"""
Removes a set of artist separators from the userdata table.
"""
# TODO: Implement
pass

View File

@ -43,7 +43,7 @@ class Track:
self.og_album = self.album self.og_album = self.album
if self.artist is not None: if self.artist is not None:
artists = split_artists(self.artist, with_and=True) artists = split_artists(self.artist)
new_title = self.title new_title = self.title
if get_flag(ParserFlags.EXTRACT_FEAT): if get_flag(ParserFlags.EXTRACT_FEAT):
@ -72,13 +72,13 @@ class Track:
self.artist_hashes = "-".join(create_hash(a, decode=True) for a in artists) self.artist_hashes = "-".join(create_hash(a, decode=True) for a in artists)
self.artist = [ArtistMinimal(a) for a in artists] self.artist = [ArtistMinimal(a) for a in artists]
albumartists = split_artists(self.albumartist, with_and=True) albumartists = split_artists(self.albumartist)
self.albumartist = [ArtistMinimal(a) for a in albumartists] self.albumartist = [ArtistMinimal(a) for a in albumartists]
self.filetype = self.filepath.rsplit(".", maxsplit=1)[-1] self.filetype = self.filepath.rsplit(".", maxsplit=1)[-1]
self.image = self.albumhash + ".webp" self.image = self.albumhash + ".webp"
if self.genre is not None: if self.genre is not None and self.genre != "":
self.genre = str(self.genre).replace("/", ",").replace(";", ",") self.genre = str(self.genre).replace("/", ",").replace(";", ",")
self.genre = str(self.genre).lower().split(",") self.genre = str(self.genre).lower().split(",")
self.genre = [g.strip() for g in self.genre] self.genre = [g.strip() for g in self.genre]

View File

@ -2,13 +2,17 @@ import re
from enum import Enum from enum import Enum
def split_artists(src: str, with_and: bool = False): def split_artists(src: str, custom_seps: set[str] = {}):
""" """
Splits a string of artists into a list of artists. Splits a string of artists into a list of artists.
""" """
exp = r"\s*(?: and |&|,|;|/)\s*" if with_and else r"\s*[,;]\s*" separators = {",", ";", "/"}.union(custom_seps)
for sep in separators:
src = src.replace(sep, "߸")
artists = src.split("߸")
artists = re.split(exp, src)
return [a.strip() for a in artists] return [a.strip() for a in artists]
@ -83,7 +87,7 @@ def parse_feat_from_title(title: str) -> tuple[list[str], str]:
return [], title return [], title
artists = match.group(1) artists = match.group(1)
artists = split_artists(artists, with_and=True) artists = split_artists(artists)
# remove "feat" group from title # remove "feat" group from title
new_title = re.sub(regex, "", title, flags=re.IGNORECASE) new_title = re.sub(regex, "", title, flags=re.IGNORECASE)
@ -94,14 +98,16 @@ def get_base_album_title(string) -> tuple[str, str | None]:
""" """
Extracts the base album title from a string. Extracts the base album title from a string.
""" """
pattern = re.compile(r'\s*(\(|\[)[^\)\]]*?(version|remaster|deluxe|edition|expanded|anniversary)[^\)\]]*?(\)|\])$', pattern = re.compile(
re.IGNORECASE) r"\s*(\(|\[)[^\)\]]*?(version|remaster|deluxe|edition|expanded|anniversary)[^\)\]]*?(\)|\])$",
re.IGNORECASE,
)
# TODO: Fix "Redundant character escape '\]' in RegExp " # TODO: Fix "Redundant character escape '\]' in RegExp "
match = pattern.search(string) match = pattern.search(string)
if match: if match:
removed_block = match.group(0) removed_block = match.group(0)
title = string.replace(removed_block, '') title = string.replace(removed_block, "")
return title.strip(), removed_block.strip() return title.strip(), removed_block.strip()
return string, None return string, None
@ -111,13 +117,14 @@ class AlbumVersionEnum(Enum):
""" """
Enum for album versions. Enum for album versions.
""" """
Explicit = ("explicit",) Explicit = ("explicit",)
ANNIVERSARY = ("anniversary",) ANNIVERSARY = ("anniversary",)
DIAMOND = ("diamond",) DIAMOND = ("diamond",)
Centennial = ("centennial",) Centennial = ("centennial",)
GOLDEN = ("gold",) GOLDEN = ("gold",)
PLATINUM = ('platinum',) PLATINUM = ("platinum",)
SILVER = ("silver",) SILVER = ("silver",)
EXPANDED = ("expanded",) EXPANDED = ("expanded",)
@ -132,7 +139,7 @@ class AlbumVersionEnum(Enum):
ARCHIVE = ("archive",) ARCHIVE = ("archive",)
Acoustic = ("acoustic",) Acoustic = ("acoustic",)
DOUBLE_DISC = ('double disc', 'double disk') DOUBLE_DISC = ("double disc", "double disk")
SUMMER = ("summer",) SUMMER = ("summer",)
WINTER = ("winter",) WINTER = ("winter",)
@ -193,7 +200,9 @@ def get_album_info(bracket_text: str | None) -> list[str]:
return versions return versions
def get_base_title_and_versions(original_album_title: str, get_versions=True) -> tuple[str, list[str]]: def get_base_title_and_versions(
original_album_title: str, get_versions=True
) -> tuple[str, list[str]]:
""" """
Extracts the base album title and version info from an album title string using regex. Extracts the base album title and version info from an album title string using regex.
""" """
@ -218,14 +227,18 @@ def remove_bracketed_remaster(text: str):
""" """
Removes remaster info from a track title that contains brackets using regex. Removes remaster info from a track title that contains brackets using regex.
""" """
return re.sub(r'\s*[\\[(][^)\]]*remaster[^)\]]*[)\]]\s*', '', text, flags=re.IGNORECASE).strip() return re.sub(
r"\s*[\\[(][^)\]]*remaster[^)\]]*[)\]]\s*", "", text, flags=re.IGNORECASE
).strip()
def remove_hyphen_remasters(text: str): def remove_hyphen_remasters(text: str):
""" """
Removes remaster info from a track title that contains a hypen (-) using regex. Removes remaster info from a track title that contains a hypen (-) using regex.
""" """
return re.sub(r'\s-\s*[^-]*\bremaster[^-]*\s*', '', text, flags=re.IGNORECASE).strip() return re.sub(
r"\s-\s*[^-]*\bremaster[^-]*\s*", "", text, flags=re.IGNORECASE
).strip()
def clean_title(title: str) -> str: def clean_title(title: str) -> str: