From 2ba1b0386e74b92e38667275e88cecdb499decc5 Mon Sep 17 00:00:00 2001 From: geoffrey45 Date: Tue, 24 Jan 2023 18:53:30 +0300 Subject: [PATCH] feat: extract featured artists from track title --- .gitignore | 3 +-- app/api/artist.py | 3 +-- app/api/settings.py | 13 ++++++------- app/models.py | 18 +++++++++++------- app/utils.py | 28 +++++++++++++++++++++++++++- tests/__init__.py | 0 tests/test.py | 18 ++++++++++++++++++ 7 files changed, 64 insertions(+), 19 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/test.py diff --git a/.gitignore b/.gitignore index f87f9f0..9164181 100644 --- a/.gitignore +++ b/.gitignore @@ -16,10 +16,9 @@ __pycache__ .hypothesis sqllib.py encoderx.py -tests .pytest_cache # pyinstaller files dist build -client \ No newline at end of file +client diff --git a/app/api/artist.py b/app/api/artist.py index f03e296..a25e300 100644 --- a/app/api/artist.py +++ b/app/api/artist.py @@ -19,7 +19,7 @@ class CacheEntry: """ def __init__( - self, artisthash: str, albumhashes: set[str], tracks: list[Track] + self, artisthash: str, albumhashes: set[str], tracks: list[Track] ) -> None: self.albums: list[Album] = [] self.tracks: list[Track] = [] @@ -275,7 +275,6 @@ def get_artist_tracks(artisthash: str): # return {"albums": albums[:limit]} - # @artist_bp.route("/artist/") # @cache.cached() # def get_artist_data(artist: str): diff --git a/app/api/settings.py b/app/api/settings.py index bc2c8e0..2d99894 100644 --- a/app/api/settings.py +++ b/app/api/settings.py @@ -1,7 +1,6 @@ from flask import Blueprint, request from app import settings - from app.logger import log from app.lib import populate from app.db.store import Store @@ -15,7 +14,7 @@ api = Blueprint("settings", __name__, url_prefix="/") def get_child_dirs(parent: str, children: list[str]): """Returns child directories in a list, given a parent directory""" - return [dir for dir in children if dir.startswith(parent) and dir != parent] + return [_dir for _dir in children if _dir.startswith(parent) and _dir != parent] @background @@ -56,10 +55,10 @@ def add_root_dirs(): except KeyError: return msg, 400 - def finalize(new_dirs: list[str], removed_dirs: list[str], db_dirs: list[str]): - sdb.remove_root_dirs(removed_dirs) - sdb.add_root_dirs(new_dirs) - rebuild_store(db_dirs) + def finalize(new_: list[str], removed_: list[str], db_dirs_: list[str]): + sdb.remove_root_dirs(removed_) + sdb.add_root_dirs(new_) + rebuild_store(db_dirs_) # --- db_dirs = sdb.get_root_dirs() @@ -91,7 +90,7 @@ def add_root_dirs(): pass db_dirs.extend(new_dirs) - db_dirs = [dir for dir in db_dirs if dir != _h] + db_dirs = [dir_ for dir_ in db_dirs if dir_ != _h] finalize(new_dirs, removed_dirs, db_dirs) diff --git a/app/models.py b/app/models.py index 650ad8f..a16cad6 100644 --- a/app/models.py +++ b/app/models.py @@ -58,10 +58,14 @@ class Track: def __post_init__(self): if self.artist is not None: - artist_str = str(self.artist).split(", ") - self.artist_hashes = [utils.create_hash(a, decode=True) for a in artist_str] + artists = utils.split_artists(self.artist) + featured = utils.extract_featured_artists_from_title(self.title) + artists.extend(featured) + artists = set(artists) - self.artist = [Artist(a) for a in artist_str] + self.artist_hashes = [utils.create_hash(a, decode=True) for a in artists] + + self.artist = [Artist(a) for a in artists] albumartists = str(self.albumartist).split(", ") self.albumartist = [Artist(a) for a in albumartists] @@ -150,10 +154,10 @@ class Album: Checks if the album is a single. """ if ( - len(tracks) == 1 - and tracks[0].title == self.title - and tracks[0].track == 1 - and tracks[0].disc == 1 + len(tracks) == 1 + and tracks[0].title == self.title + and tracks[0].track == 1 + and tracks[0].disc == 1 ): self.is_single = True diff --git a/app/utils.py b/app/utils.py index 7667286..2fae6c4 100644 --- a/app/utils.py +++ b/app/utils.py @@ -1,6 +1,7 @@ """ This module contains mini functions for the server. """ +import re from pathlib import Path from datetime import datetime @@ -187,7 +188,7 @@ def get_albumartists(albums: list[models.Album]) -> set[str]: def get_all_artists( - tracks: list[models.Track], albums: list[models.Album] + tracks: list[models.Track], albums: list[models.Album] ) -> list[models.Artist]: artists_from_tracks = get_artists_from_tracks(tracks) artist_from_albums = get_albumartists(albums) @@ -250,3 +251,28 @@ def is_windows(): Returns True if the OS is Windows. """ return platform.system() == "Windows" + + +def split_artists(src: str): + artists = re.split(r"\s*[&,;/]\s*", src) + return [a.strip() for a in artists] + + + + + +def extract_featured_artists_from_title(title: str) -> list[str]: + """ + Extracts featured artists from a song title using regex. + """ + regex = r"\((?:feat|ft|featuring|with)\.?\s+(.+?)\)" + match = re.search(regex, title, re.IGNORECASE) + + if not match: + return [] + + artists = match.group(1) + artists = split_artists(artists) + return artists + + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test.py b/tests/test.py new file mode 100644 index 0000000..29b1290 --- /dev/null +++ b/tests/test.py @@ -0,0 +1,18 @@ +from app.utils import extract_featured_artists_from_title + + +def test_extract_featured_artists_from_title(): + test_titles = [ + "Own it (Featuring Ed Sheeran & Stormzy)", + "Godzilla (Deluxe)(Feat. Juice Wrld)(Deluxe)", + "Simmer (with Burna Boy)", + ] + + expected_test_artists = [ + ["Ed Sheeran", "Stormzy"], + ['Juice Wrld'], + ["Burna Boy"] + ] + + for title, expected in zip(test_titles, expected_test_artists): + assert extract_featured_artists_from_title(title) == expected