rewrite split_artists parser to remove regex

+ write placehold functions to manage custom separators
This commit is contained in:
mungai-njoroge 2023-06-26 14:21:11 +03:00
parent 1cf5d38a96
commit 4a9d6bc3e6
4 changed files with 54 additions and 16 deletions

View File

@ -24,7 +24,8 @@ CREATE TABLE IF NOT EXISTS favorites (
CREATE TABLE IF NOT EXISTS settings (
id integer PRIMARY KEY,
root_dirs text NOT NULL,
exclude_dirs text
exclude_dirs text,
artist_separators text
)
"""

View File

@ -88,3 +88,27 @@ class SettingsSQLMethods:
cur.execute(sql)
dirs = cur.fetchall()
return [_dir[0] for _dir in dirs]
@staticmethod
def add_artist_separators(seps: set[str]):
"""
Adds a set of artist separators to the userdata table.
"""
# TODO: Implement
pass
@staticmethod
def get_artist_separators() -> set[str]:
"""
Gets a set of artist separators from the userdata table.
"""
# TODO: Implement
pass
@staticmethod
def remove_artist_separators(seps: set[str]):
"""
Removes a set of artist separators from the userdata table.
"""
# TODO: Implement
pass

View File

@ -43,7 +43,7 @@ class Track:
self.og_album = self.album
if self.artist is not None:
artists = split_artists(self.artist, with_and=True)
artists = split_artists(self.artist)
new_title = self.title
if get_flag(ParserFlags.EXTRACT_FEAT):
@ -72,13 +72,13 @@ class Track:
self.artist_hashes = "-".join(create_hash(a, decode=True) for a in artists)
self.artist = [ArtistMinimal(a) for a in artists]
albumartists = split_artists(self.albumartist, with_and=True)
albumartists = split_artists(self.albumartist)
self.albumartist = [ArtistMinimal(a) for a in albumartists]
self.filetype = self.filepath.rsplit(".", maxsplit=1)[-1]
self.image = self.albumhash + ".webp"
if self.genre is not None:
if self.genre is not None and self.genre != "":
self.genre = str(self.genre).replace("/", ",").replace(";", ",")
self.genre = str(self.genre).lower().split(",")
self.genre = [g.strip() for g in self.genre]

View File

@ -2,13 +2,17 @@ import re
from enum import Enum
def split_artists(src: str, with_and: bool = False):
def split_artists(src: str, custom_seps: set[str] = {}):
"""
Splits a string of artists into a list of artists.
"""
exp = r"\s*(?: and |&|,|;|/)\s*" if with_and else r"\s*[,;]\s*"
separators = {",", ";", "/"}.union(custom_seps)
for sep in separators:
src = src.replace(sep, "߸")
artists = src.split("߸")
artists = re.split(exp, src)
return [a.strip() for a in artists]
@ -83,7 +87,7 @@ def parse_feat_from_title(title: str) -> tuple[list[str], str]:
return [], title
artists = match.group(1)
artists = split_artists(artists, with_and=True)
artists = split_artists(artists)
# remove "feat" group from title
new_title = re.sub(regex, "", title, flags=re.IGNORECASE)
@ -94,14 +98,16 @@ def get_base_album_title(string) -> tuple[str, str | None]:
"""
Extracts the base album title from a string.
"""
pattern = re.compile(r'\s*(\(|\[)[^\)\]]*?(version|remaster|deluxe|edition|expanded|anniversary)[^\)\]]*?(\)|\])$',
re.IGNORECASE)
pattern = re.compile(
r"\s*(\(|\[)[^\)\]]*?(version|remaster|deluxe|edition|expanded|anniversary)[^\)\]]*?(\)|\])$",
re.IGNORECASE,
)
# TODO: Fix "Redundant character escape '\]' in RegExp "
match = pattern.search(string)
if match:
removed_block = match.group(0)
title = string.replace(removed_block, '')
title = string.replace(removed_block, "")
return title.strip(), removed_block.strip()
return string, None
@ -111,13 +117,14 @@ class AlbumVersionEnum(Enum):
"""
Enum for album versions.
"""
Explicit = ("explicit",)
ANNIVERSARY = ("anniversary",)
DIAMOND = ("diamond",)
Centennial = ("centennial",)
GOLDEN = ("gold",)
PLATINUM = ('platinum',)
PLATINUM = ("platinum",)
SILVER = ("silver",)
EXPANDED = ("expanded",)
@ -132,7 +139,7 @@ class AlbumVersionEnum(Enum):
ARCHIVE = ("archive",)
Acoustic = ("acoustic",)
DOUBLE_DISC = ('double disc', 'double disk')
DOUBLE_DISC = ("double disc", "double disk")
SUMMER = ("summer",)
WINTER = ("winter",)
@ -193,7 +200,9 @@ def get_album_info(bracket_text: str | None) -> list[str]:
return versions
def get_base_title_and_versions(original_album_title: str, get_versions=True) -> tuple[str, list[str]]:
def get_base_title_and_versions(
original_album_title: str, get_versions=True
) -> tuple[str, list[str]]:
"""
Extracts the base album title and version info from an album title string using regex.
"""
@ -218,14 +227,18 @@ def remove_bracketed_remaster(text: str):
"""
Removes remaster info from a track title that contains brackets using regex.
"""
return re.sub(r'\s*[\\[(][^)\]]*remaster[^)\]]*[)\]]\s*', '', text, flags=re.IGNORECASE).strip()
return re.sub(
r"\s*[\\[(][^)\]]*remaster[^)\]]*[)\]]\s*", "", text, flags=re.IGNORECASE
).strip()
def remove_hyphen_remasters(text: str):
"""
Removes remaster info from a track title that contains a hypen (-) using regex.
"""
return re.sub(r'\s-\s*[^-]*\bremaster[^-]*\s*', '', text, flags=re.IGNORECASE).strip()
return re.sub(
r"\s-\s*[^-]*\bremaster[^-]*\s*", "", text, flags=re.IGNORECASE
).strip()
def clean_title(title: str) -> str: