mirror of
https://github.com/tcsenpai/swingmusic.git
synced 2025-06-10 13:07:35 +00:00
rewrite remove duplicates to retain tracks with highest bitrate
+ bump fuzzy search cutoff to 90 + remove unicodes from fuzzy search texts
This commit is contained in:
parent
8e7021186d
commit
c352037ccd
@ -7,6 +7,7 @@ from flask import Blueprint, request
|
|||||||
from app import models, utils
|
from app import models, utils
|
||||||
from app.db.store import Store
|
from app.db.store import Store
|
||||||
from app.lib import searchlib
|
from app.lib import searchlib
|
||||||
|
from unidecode import unidecode
|
||||||
|
|
||||||
api = Blueprint("search", __name__, url_prefix="/")
|
api = Blueprint("search", __name__, url_prefix="/")
|
||||||
|
|
||||||
@ -35,8 +36,8 @@ class DoSearch:
|
|||||||
:param :str:`query`: the search query.
|
:param :str:`query`: the search query.
|
||||||
"""
|
"""
|
||||||
self.tracks: list[models.Track] = []
|
self.tracks: list[models.Track] = []
|
||||||
self.query = query
|
self.query = unidecode(query)
|
||||||
SearchResults.query = query
|
SearchResults.query = self.query
|
||||||
|
|
||||||
def search_tracks(self):
|
def search_tracks(self):
|
||||||
"""Calls :class:`SearchTracks` which returns the tracks that fuzzily match
|
"""Calls :class:`SearchTracks` which returns the tracks that fuzzily match
|
||||||
@ -57,9 +58,8 @@ class DoSearch:
|
|||||||
"""Calls :class:`SearchArtists` which returns the artists that fuzzily match
|
"""Calls :class:`SearchArtists` which returns the artists that fuzzily match
|
||||||
the search term. Then adds them to the `SearchResults` store.
|
the search term. Then adds them to the `SearchResults` store.
|
||||||
"""
|
"""
|
||||||
# self.artists = utils.Get.get_all_artists()
|
|
||||||
artists = [a.name for a in Store.artists]
|
artists = [a.name for a in Store.artists]
|
||||||
artists = searchlib.SearchArtists(artists, self.query)()
|
artists = searchlib.SearchArtists(Store.artists, self.query)()
|
||||||
SearchResults.artists = artists
|
SearchResults.artists = artists
|
||||||
|
|
||||||
return artists
|
return artists
|
||||||
@ -68,7 +68,6 @@ class DoSearch:
|
|||||||
"""Calls :class:`SearchAlbums` which returns the albums that fuzzily match
|
"""Calls :class:`SearchAlbums` which returns the albums that fuzzily match
|
||||||
the search term. Then adds them to the `SearchResults` store.
|
the search term. Then adds them to the `SearchResults` store.
|
||||||
"""
|
"""
|
||||||
# albums = utils.Get.get_all_albums()
|
|
||||||
albums = Store.albums
|
albums = Store.albums
|
||||||
albums = searchlib.SearchAlbums(albums, self.query)()
|
albums = searchlib.SearchAlbums(albums, self.query)()
|
||||||
SearchResults.albums = albums
|
SearchResults.albums = albums
|
||||||
@ -179,12 +178,12 @@ def get_top_results():
|
|||||||
|
|
||||||
DoSearch(query).search_all()
|
DoSearch(query).search_all()
|
||||||
|
|
||||||
max = 2
|
max_results = 2
|
||||||
return {
|
return {
|
||||||
"tracks": SearchResults.tracks[:max],
|
"tracks": SearchResults.tracks[:max_results],
|
||||||
"albums": SearchResults.albums[:max],
|
"albums": SearchResults.albums[:max_results],
|
||||||
"artists": SearchResults.artists[:max],
|
"artists": SearchResults.artists[:max_results],
|
||||||
"playlists": SearchResults.playlists[:max],
|
"playlists": SearchResults.playlists[:max_results],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -199,20 +198,20 @@ def search_load_more():
|
|||||||
if s_type == "tracks":
|
if s_type == "tracks":
|
||||||
t = SearchResults.tracks
|
t = SearchResults.tracks
|
||||||
return {
|
return {
|
||||||
"tracks": t[index: index + SEARCH_COUNT],
|
"tracks": t[index : index + SEARCH_COUNT],
|
||||||
"more": len(t) > index + SEARCH_COUNT,
|
"more": len(t) > index + SEARCH_COUNT,
|
||||||
}
|
}
|
||||||
|
|
||||||
elif s_type == "albums":
|
elif s_type == "albums":
|
||||||
a = SearchResults.albums
|
a = SearchResults.albums
|
||||||
return {
|
return {
|
||||||
"albums": a[index: index + SEARCH_COUNT],
|
"albums": a[index : index + SEARCH_COUNT],
|
||||||
"more": len(a) > index + SEARCH_COUNT,
|
"more": len(a) > index + SEARCH_COUNT,
|
||||||
}
|
}
|
||||||
|
|
||||||
elif s_type == "artists":
|
elif s_type == "artists":
|
||||||
a = SearchResults.artists
|
a = SearchResults.artists
|
||||||
return {
|
return {
|
||||||
"artists": a[index: index + SEARCH_COUNT],
|
"artists": a[index : index + SEARCH_COUNT],
|
||||||
"more": len(a) > index + SEARCH_COUNT,
|
"more": len(a) > index + SEARCH_COUNT,
|
||||||
}
|
}
|
||||||
|
@ -69,14 +69,8 @@ class Store:
|
|||||||
Returns a list of tracks by their hashes.
|
Returns a list of tracks by their hashes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tracks = []
|
trackhashes = " ".join(trackhashes)
|
||||||
|
return [track for track in cls.tracks if track.trackhash in trackhashes]
|
||||||
for trackhash in trackhashes:
|
|
||||||
for track in cls.tracks:
|
|
||||||
if track.trackhash == trackhash:
|
|
||||||
tracks.append(track)
|
|
||||||
|
|
||||||
return tracks
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def remove_track_by_filepath(cls, filepath: str):
|
def remove_track_by_filepath(cls, filepath: str):
|
||||||
|
@ -7,7 +7,6 @@ from requests import ReadTimeout
|
|||||||
|
|
||||||
from app import utils
|
from app import utils
|
||||||
from app.lib.artistlib import CheckArtistImages
|
from app.lib.artistlib import CheckArtistImages
|
||||||
from app.lib.colorlib import ProcessArtistColors
|
|
||||||
from app.lib.populate import Populate, PopulateCancelledError
|
from app.lib.populate import Populate, PopulateCancelledError
|
||||||
from app.lib.trackslib import validate_tracks
|
from app.lib.trackslib import validate_tracks
|
||||||
from app.logger import log
|
from app.logger import log
|
||||||
|
@ -4,6 +4,7 @@ This library contains all the functions related to the search functionality.
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from rapidfuzz import fuzz, process
|
from rapidfuzz import fuzz, process
|
||||||
|
from unidecode import unidecode
|
||||||
|
|
||||||
from app import models
|
from app import models
|
||||||
|
|
||||||
@ -16,10 +17,10 @@ class Cutoff:
|
|||||||
Holds all the default cutoff values.
|
Holds all the default cutoff values.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tracks: int = 60
|
tracks: int = 90
|
||||||
albums: int = 60
|
albums: int = 90
|
||||||
artists: int = 60
|
artists: int = 90
|
||||||
playlists: int = 60
|
playlists: int = 90
|
||||||
|
|
||||||
|
|
||||||
class Limit:
|
class Limit:
|
||||||
@ -27,10 +28,10 @@ class Limit:
|
|||||||
Holds all the default limit values.
|
Holds all the default limit values.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tracks: int = 50
|
tracks: int = 150
|
||||||
albums: int = 50
|
albums: int = 150
|
||||||
artists: int = 50
|
artists: int = 150
|
||||||
playlists: int = 50
|
playlists: int = 150
|
||||||
|
|
||||||
|
|
||||||
class SearchTracks:
|
class SearchTracks:
|
||||||
@ -43,7 +44,7 @@ class SearchTracks:
|
|||||||
Gets all songs with a given title.
|
Gets all songs with a given title.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tracks = [track.og_title for track in self.tracks]
|
tracks = [unidecode(track.og_title).lower() for track in self.tracks]
|
||||||
results = process.extract(
|
results = process.extract(
|
||||||
self.query,
|
self.query,
|
||||||
tracks,
|
tracks,
|
||||||
@ -56,7 +57,7 @@ class SearchTracks:
|
|||||||
|
|
||||||
|
|
||||||
class SearchArtists:
|
class SearchArtists:
|
||||||
def __init__(self, artists: list[str], query: str) -> None:
|
def __init__(self, artists: list[models.Artist], query: str) -> None:
|
||||||
self.query = query
|
self.query = query
|
||||||
self.artists = artists
|
self.artists = artists
|
||||||
|
|
||||||
@ -64,17 +65,18 @@ class SearchArtists:
|
|||||||
"""
|
"""
|
||||||
Gets all artists with a given name.
|
Gets all artists with a given name.
|
||||||
"""
|
"""
|
||||||
|
artists = [unidecode(a.name).lower() for a in self.artists]
|
||||||
|
|
||||||
results = process.extract(
|
results = process.extract(
|
||||||
self.query,
|
self.query,
|
||||||
self.artists,
|
artists,
|
||||||
scorer=fuzz.WRatio,
|
scorer=fuzz.WRatio,
|
||||||
score_cutoff=Cutoff.artists,
|
score_cutoff=Cutoff.artists,
|
||||||
limit=Limit.artists,
|
limit=Limit.artists,
|
||||||
)
|
)
|
||||||
|
|
||||||
artists = [a[0] for a in results]
|
artists = [a[0] for a in results]
|
||||||
return [models.Artist(a) for a in artists]
|
return [self.artists[i[2]] for i in results]
|
||||||
|
|
||||||
|
|
||||||
class SearchAlbums:
|
class SearchAlbums:
|
||||||
@ -87,7 +89,7 @@ class SearchAlbums:
|
|||||||
Gets all albums with a given title.
|
Gets all albums with a given title.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
albums = [a.title.lower() for a in self.albums]
|
albums = [unidecode(a.title).lower() for a in self.albums]
|
||||||
|
|
||||||
results = process.extract(
|
results = process.extract(
|
||||||
self.query,
|
self.query,
|
||||||
|
@ -1,55 +1,54 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
|
||||||
def date_string_to_time_passed(prev_date: str) -> str:
|
def date_string_to_time_passed(prev_date: str) -> str:
|
||||||
"""
|
"""
|
||||||
Converts a date string to time passed. eg. 2 minutes ago, 1 hour ago, yesterday, 2 days ago, 2 weeks ago, etc.
|
Converts a date string to time passed. eg. 2 minutes ago, 1 hour ago, yesterday, 2 days ago, 2 weeks ago, etc.
|
||||||
"""
|
"""
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
now = datetime.now()
|
then = datetime.strptime(prev_date, "%Y-%m-%d %H:%M:%S").replace(
|
||||||
then = datetime.strptime(prev_date, "%Y-%m-%d %H:%M:%S")
|
tzinfo=timezone.utc
|
||||||
|
)
|
||||||
|
|
||||||
diff = now - then
|
diff = now - then
|
||||||
days = diff.days
|
seconds = diff.total_seconds()
|
||||||
|
|
||||||
if days < 0:
|
if seconds < 0:
|
||||||
return "in the future"
|
return "in the future"
|
||||||
|
|
||||||
if days == 0:
|
if seconds < 15:
|
||||||
seconds = diff.seconds
|
return "now"
|
||||||
|
|
||||||
if seconds < 15:
|
if seconds < 60:
|
||||||
return "now"
|
return f"{int(seconds)} seconds ago"
|
||||||
|
|
||||||
if seconds < 60:
|
if seconds < 3600:
|
||||||
return str(seconds) + " seconds ago"
|
return f"{int(seconds // 60)} minutes ago"
|
||||||
|
|
||||||
if seconds < 3600:
|
if seconds < 86400:
|
||||||
return str(seconds // 60) + " minutes ago"
|
return f"{int(seconds // 3600)} hours ago"
|
||||||
|
|
||||||
return str(seconds // 3600) + " hours ago"
|
days = diff.days
|
||||||
|
|
||||||
if days == 1:
|
if days == 1:
|
||||||
return "yesterday"
|
return "yesterday"
|
||||||
|
|
||||||
if days < 7:
|
if days < 7:
|
||||||
return str(days) + " days ago"
|
return f"{days} days ago"
|
||||||
|
|
||||||
|
if days < 14:
|
||||||
|
return "1 week ago"
|
||||||
|
|
||||||
if days < 30:
|
if days < 30:
|
||||||
if days < 14:
|
return f"{int(days // 7)} weeks ago"
|
||||||
return "1 week ago"
|
|
||||||
|
if days < 60:
|
||||||
|
return "1 month ago"
|
||||||
|
|
||||||
return str(days // 7) + " weeks ago"
|
|
||||||
if days < 365:
|
if days < 365:
|
||||||
if days < 60:
|
return f"{int(days // 30)} months ago"
|
||||||
return "1 month ago"
|
|
||||||
|
|
||||||
return str(days // 30) + " months ago"
|
if days < 730:
|
||||||
if days > 365:
|
return "1 year ago"
|
||||||
if days < 730:
|
|
||||||
return "1 year ago"
|
|
||||||
|
|
||||||
return str(days // 365) + " years ago"
|
|
||||||
|
|
||||||
return "I honestly don't know"
|
|
||||||
|
|
||||||
|
return f"{int(days // 365)} years ago"
|
||||||
|
43
app/utils.py
43
app/utils.py
@ -11,6 +11,8 @@ import string
|
|||||||
import threading
|
import threading
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from collections import defaultdict
|
||||||
|
from operator import attrgetter
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from unidecode import unidecode
|
from unidecode import unidecode
|
||||||
@ -67,34 +69,37 @@ def run_fast_scandir(_dir: str, full=False) -> tuple[list[str], list[str]]:
|
|||||||
|
|
||||||
def remove_duplicates(tracks: list[models.Track]) -> list[models.Track]:
|
def remove_duplicates(tracks: list[models.Track]) -> list[models.Track]:
|
||||||
"""
|
"""
|
||||||
Removes duplicate tracks from a list of tracks.
|
Remove duplicates from a list of Track objects based on the trackhash attribute.
|
||||||
|
Retains objects with the highest bitrate.
|
||||||
"""
|
"""
|
||||||
hashes = []
|
hash_to_tracks = defaultdict(list)
|
||||||
|
|
||||||
for track in tracks:
|
for track in tracks:
|
||||||
if track.trackhash not in hashes:
|
hash_to_tracks[track.trackhash].append(track)
|
||||||
hashes.append(track.trackhash)
|
|
||||||
|
|
||||||
tracks = sorted(tracks, key=lambda x: x.trackhash)
|
tracks = []
|
||||||
tracks = UseBisection(tracks, "trackhash", hashes)()
|
|
||||||
|
|
||||||
return [t for t in tracks if t is not None]
|
for track_group in hash_to_tracks.values():
|
||||||
|
max_bitrate_track = max(track_group, key=attrgetter("bitrate"))
|
||||||
|
tracks.append(max_bitrate_track)
|
||||||
|
|
||||||
|
return tracks
|
||||||
|
|
||||||
|
|
||||||
def create_hash(*args: str, decode=False, limit=7) -> str:
|
def create_hash(*args: str, decode=False, limit=7) -> str:
|
||||||
"""
|
"""
|
||||||
Creates a simple hash for an album
|
Creates a simple hash for an album
|
||||||
"""
|
"""
|
||||||
string = "".join(args)
|
str_ = "".join(args)
|
||||||
|
|
||||||
if decode:
|
if decode:
|
||||||
string = unidecode(string)
|
str_ = unidecode(str_)
|
||||||
|
|
||||||
string = string.lower().strip().replace(" ", "")
|
str_ = str_.lower().strip().replace(" ", "")
|
||||||
string = "".join(t for t in string if t.isalnum())
|
str_ = "".join(t for t in str_ if t.isalnum())
|
||||||
string = string.encode("utf-8")
|
str_ = str_.encode("utf-8")
|
||||||
string = hashlib.sha256(string).hexdigest()
|
str_ = hashlib.sha256(str_).hexdigest()
|
||||||
return string[-limit:]
|
return str_[-limit:]
|
||||||
|
|
||||||
|
|
||||||
def create_folder_hash(*args: str, limit=7) -> str:
|
def create_folder_hash(*args: str, limit=7) -> str:
|
||||||
@ -191,7 +196,7 @@ def get_albumartists(albums: list[models.Album]) -> set[str]:
|
|||||||
|
|
||||||
|
|
||||||
def get_all_artists(
|
def get_all_artists(
|
||||||
tracks: list[models.Track], albums: list[models.Album]
|
tracks: list[models.Track], albums: list[models.Album]
|
||||||
) -> list[models.Artist]:
|
) -> list[models.Artist]:
|
||||||
artists_from_tracks = get_artists_from_tracks(tracks)
|
artists_from_tracks = get_artists_from_tracks(tracks)
|
||||||
artist_from_albums = get_albumartists(albums)
|
artist_from_albums = get_albumartists(albums)
|
||||||
@ -300,7 +305,7 @@ def win_replace_slash(path: str):
|
|||||||
|
|
||||||
|
|
||||||
def split_artists(src: str, with_and: bool = False):
|
def split_artists(src: str, with_and: bool = False):
|
||||||
exp = r"\s*(?:and|&|,|;)\s*" if with_and else r"\s*[,;]\s*"
|
exp = r"\s*(?: and |&|,|;)\s*" if with_and else r"\s*[,;]\s*"
|
||||||
|
|
||||||
artists = re.split(exp, src)
|
artists = re.split(exp, src)
|
||||||
return [a.strip() for a in artists]
|
return [a.strip() for a in artists]
|
||||||
@ -349,10 +354,10 @@ def remove_prod(title: str) -> str:
|
|||||||
return title
|
return title
|
||||||
|
|
||||||
# check if title has brackets
|
# check if title has brackets
|
||||||
if re.search(r'[()\[\]]', title):
|
if re.search(r"[()\[\]]", title):
|
||||||
regex = r'\s?(\(|\[)prod\..*?(\)|\])\s?'
|
regex = r"\s?(\(|\[)prod\..*?(\)|\])\s?"
|
||||||
else:
|
else:
|
||||||
regex = r'\s?\bprod\.\s*\S+'
|
regex = r"\s?\bprod\.\s*\S+"
|
||||||
|
|
||||||
# remove the producer string
|
# remove the producer string
|
||||||
title = re.sub(regex, "", title, flags=re.IGNORECASE)
|
title = re.sub(regex, "", title, flags=re.IGNORECASE)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user