mirror of
https://github.com/tcsenpai/swingmusic.git
synced 2025-06-07 03:35:35 +00:00
rewrite remove duplicates to retain tracks with highest bitrate
+ bump fuzzy search cutoff to 90 + remove unicodes from fuzzy search texts
This commit is contained in:
parent
8e7021186d
commit
c352037ccd
@ -7,6 +7,7 @@ from flask import Blueprint, request
|
||||
from app import models, utils
|
||||
from app.db.store import Store
|
||||
from app.lib import searchlib
|
||||
from unidecode import unidecode
|
||||
|
||||
api = Blueprint("search", __name__, url_prefix="/")
|
||||
|
||||
@ -35,8 +36,8 @@ class DoSearch:
|
||||
:param :str:`query`: the search query.
|
||||
"""
|
||||
self.tracks: list[models.Track] = []
|
||||
self.query = query
|
||||
SearchResults.query = query
|
||||
self.query = unidecode(query)
|
||||
SearchResults.query = self.query
|
||||
|
||||
def search_tracks(self):
|
||||
"""Calls :class:`SearchTracks` which returns the tracks that fuzzily match
|
||||
@ -57,9 +58,8 @@ class DoSearch:
|
||||
"""Calls :class:`SearchArtists` which returns the artists that fuzzily match
|
||||
the search term. Then adds them to the `SearchResults` store.
|
||||
"""
|
||||
# self.artists = utils.Get.get_all_artists()
|
||||
artists = [a.name for a in Store.artists]
|
||||
artists = searchlib.SearchArtists(artists, self.query)()
|
||||
artists = searchlib.SearchArtists(Store.artists, self.query)()
|
||||
SearchResults.artists = artists
|
||||
|
||||
return artists
|
||||
@ -68,7 +68,6 @@ class DoSearch:
|
||||
"""Calls :class:`SearchAlbums` which returns the albums that fuzzily match
|
||||
the search term. Then adds them to the `SearchResults` store.
|
||||
"""
|
||||
# albums = utils.Get.get_all_albums()
|
||||
albums = Store.albums
|
||||
albums = searchlib.SearchAlbums(albums, self.query)()
|
||||
SearchResults.albums = albums
|
||||
@ -179,12 +178,12 @@ def get_top_results():
|
||||
|
||||
DoSearch(query).search_all()
|
||||
|
||||
max = 2
|
||||
max_results = 2
|
||||
return {
|
||||
"tracks": SearchResults.tracks[:max],
|
||||
"albums": SearchResults.albums[:max],
|
||||
"artists": SearchResults.artists[:max],
|
||||
"playlists": SearchResults.playlists[:max],
|
||||
"tracks": SearchResults.tracks[:max_results],
|
||||
"albums": SearchResults.albums[:max_results],
|
||||
"artists": SearchResults.artists[:max_results],
|
||||
"playlists": SearchResults.playlists[:max_results],
|
||||
}
|
||||
|
||||
|
||||
@ -199,20 +198,20 @@ def search_load_more():
|
||||
if s_type == "tracks":
|
||||
t = SearchResults.tracks
|
||||
return {
|
||||
"tracks": t[index: index + SEARCH_COUNT],
|
||||
"tracks": t[index : index + SEARCH_COUNT],
|
||||
"more": len(t) > index + SEARCH_COUNT,
|
||||
}
|
||||
|
||||
elif s_type == "albums":
|
||||
a = SearchResults.albums
|
||||
return {
|
||||
"albums": a[index: index + SEARCH_COUNT],
|
||||
"albums": a[index : index + SEARCH_COUNT],
|
||||
"more": len(a) > index + SEARCH_COUNT,
|
||||
}
|
||||
|
||||
elif s_type == "artists":
|
||||
a = SearchResults.artists
|
||||
return {
|
||||
"artists": a[index: index + SEARCH_COUNT],
|
||||
"artists": a[index : index + SEARCH_COUNT],
|
||||
"more": len(a) > index + SEARCH_COUNT,
|
||||
}
|
||||
|
@ -69,14 +69,8 @@ class Store:
|
||||
Returns a list of tracks by their hashes.
|
||||
"""
|
||||
|
||||
tracks = []
|
||||
|
||||
for trackhash in trackhashes:
|
||||
for track in cls.tracks:
|
||||
if track.trackhash == trackhash:
|
||||
tracks.append(track)
|
||||
|
||||
return tracks
|
||||
trackhashes = " ".join(trackhashes)
|
||||
return [track for track in cls.tracks if track.trackhash in trackhashes]
|
||||
|
||||
@classmethod
|
||||
def remove_track_by_filepath(cls, filepath: str):
|
||||
|
@ -7,7 +7,6 @@ from requests import ReadTimeout
|
||||
|
||||
from app import utils
|
||||
from app.lib.artistlib import CheckArtistImages
|
||||
from app.lib.colorlib import ProcessArtistColors
|
||||
from app.lib.populate import Populate, PopulateCancelledError
|
||||
from app.lib.trackslib import validate_tracks
|
||||
from app.logger import log
|
||||
|
@ -4,6 +4,7 @@ This library contains all the functions related to the search functionality.
|
||||
from typing import List
|
||||
|
||||
from rapidfuzz import fuzz, process
|
||||
from unidecode import unidecode
|
||||
|
||||
from app import models
|
||||
|
||||
@ -16,10 +17,10 @@ class Cutoff:
|
||||
Holds all the default cutoff values.
|
||||
"""
|
||||
|
||||
tracks: int = 60
|
||||
albums: int = 60
|
||||
artists: int = 60
|
||||
playlists: int = 60
|
||||
tracks: int = 90
|
||||
albums: int = 90
|
||||
artists: int = 90
|
||||
playlists: int = 90
|
||||
|
||||
|
||||
class Limit:
|
||||
@ -27,10 +28,10 @@ class Limit:
|
||||
Holds all the default limit values.
|
||||
"""
|
||||
|
||||
tracks: int = 50
|
||||
albums: int = 50
|
||||
artists: int = 50
|
||||
playlists: int = 50
|
||||
tracks: int = 150
|
||||
albums: int = 150
|
||||
artists: int = 150
|
||||
playlists: int = 150
|
||||
|
||||
|
||||
class SearchTracks:
|
||||
@ -43,7 +44,7 @@ class SearchTracks:
|
||||
Gets all songs with a given title.
|
||||
"""
|
||||
|
||||
tracks = [track.og_title for track in self.tracks]
|
||||
tracks = [unidecode(track.og_title).lower() for track in self.tracks]
|
||||
results = process.extract(
|
||||
self.query,
|
||||
tracks,
|
||||
@ -56,7 +57,7 @@ class SearchTracks:
|
||||
|
||||
|
||||
class SearchArtists:
|
||||
def __init__(self, artists: list[str], query: str) -> None:
|
||||
def __init__(self, artists: list[models.Artist], query: str) -> None:
|
||||
self.query = query
|
||||
self.artists = artists
|
||||
|
||||
@ -64,17 +65,18 @@ class SearchArtists:
|
||||
"""
|
||||
Gets all artists with a given name.
|
||||
"""
|
||||
artists = [unidecode(a.name).lower() for a in self.artists]
|
||||
|
||||
results = process.extract(
|
||||
self.query,
|
||||
self.artists,
|
||||
artists,
|
||||
scorer=fuzz.WRatio,
|
||||
score_cutoff=Cutoff.artists,
|
||||
limit=Limit.artists,
|
||||
)
|
||||
|
||||
artists = [a[0] for a in results]
|
||||
return [models.Artist(a) for a in artists]
|
||||
return [self.artists[i[2]] for i in results]
|
||||
|
||||
|
||||
class SearchAlbums:
|
||||
@ -87,7 +89,7 @@ class SearchAlbums:
|
||||
Gets all albums with a given title.
|
||||
"""
|
||||
|
||||
albums = [a.title.lower() for a in self.albums]
|
||||
albums = [unidecode(a.title).lower() for a in self.albums]
|
||||
|
||||
results = process.extract(
|
||||
self.query,
|
||||
|
@ -1,55 +1,54 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
def date_string_to_time_passed(prev_date: str) -> str:
|
||||
"""
|
||||
Converts a date string to time passed. eg. 2 minutes ago, 1 hour ago, yesterday, 2 days ago, 2 weeks ago, etc.
|
||||
"""
|
||||
|
||||
now = datetime.now()
|
||||
then = datetime.strptime(prev_date, "%Y-%m-%d %H:%M:%S")
|
||||
now = datetime.now(timezone.utc)
|
||||
then = datetime.strptime(prev_date, "%Y-%m-%d %H:%M:%S").replace(
|
||||
tzinfo=timezone.utc
|
||||
)
|
||||
|
||||
diff = now - then
|
||||
days = diff.days
|
||||
seconds = diff.total_seconds()
|
||||
|
||||
if days < 0:
|
||||
if seconds < 0:
|
||||
return "in the future"
|
||||
|
||||
if days == 0:
|
||||
seconds = diff.seconds
|
||||
if seconds < 15:
|
||||
return "now"
|
||||
|
||||
if seconds < 15:
|
||||
return "now"
|
||||
if seconds < 60:
|
||||
return f"{int(seconds)} seconds ago"
|
||||
|
||||
if seconds < 60:
|
||||
return str(seconds) + " seconds ago"
|
||||
if seconds < 3600:
|
||||
return f"{int(seconds // 60)} minutes ago"
|
||||
|
||||
if seconds < 3600:
|
||||
return str(seconds // 60) + " minutes ago"
|
||||
if seconds < 86400:
|
||||
return f"{int(seconds // 3600)} hours ago"
|
||||
|
||||
return str(seconds // 3600) + " hours ago"
|
||||
days = diff.days
|
||||
|
||||
if days == 1:
|
||||
return "yesterday"
|
||||
|
||||
if days < 7:
|
||||
return str(days) + " days ago"
|
||||
return f"{days} days ago"
|
||||
|
||||
if days < 14:
|
||||
return "1 week ago"
|
||||
|
||||
if days < 30:
|
||||
if days < 14:
|
||||
return "1 week ago"
|
||||
return f"{int(days // 7)} weeks ago"
|
||||
|
||||
if days < 60:
|
||||
return "1 month ago"
|
||||
|
||||
return str(days // 7) + " weeks ago"
|
||||
if days < 365:
|
||||
if days < 60:
|
||||
return "1 month ago"
|
||||
return f"{int(days // 30)} months ago"
|
||||
|
||||
return str(days // 30) + " months ago"
|
||||
if days > 365:
|
||||
if days < 730:
|
||||
return "1 year ago"
|
||||
|
||||
return str(days // 365) + " years ago"
|
||||
|
||||
return "I honestly don't know"
|
||||
if days < 730:
|
||||
return "1 year ago"
|
||||
|
||||
return f"{int(days // 365)} years ago"
|
||||
|
43
app/utils.py
43
app/utils.py
@ -11,6 +11,8 @@ import string
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from operator import attrgetter
|
||||
|
||||
import requests
|
||||
from unidecode import unidecode
|
||||
@ -67,34 +69,37 @@ def run_fast_scandir(_dir: str, full=False) -> tuple[list[str], list[str]]:
|
||||
|
||||
def remove_duplicates(tracks: list[models.Track]) -> list[models.Track]:
|
||||
"""
|
||||
Removes duplicate tracks from a list of tracks.
|
||||
Remove duplicates from a list of Track objects based on the trackhash attribute.
|
||||
Retains objects with the highest bitrate.
|
||||
"""
|
||||
hashes = []
|
||||
hash_to_tracks = defaultdict(list)
|
||||
|
||||
for track in tracks:
|
||||
if track.trackhash not in hashes:
|
||||
hashes.append(track.trackhash)
|
||||
hash_to_tracks[track.trackhash].append(track)
|
||||
|
||||
tracks = sorted(tracks, key=lambda x: x.trackhash)
|
||||
tracks = UseBisection(tracks, "trackhash", hashes)()
|
||||
tracks = []
|
||||
|
||||
return [t for t in tracks if t is not None]
|
||||
for track_group in hash_to_tracks.values():
|
||||
max_bitrate_track = max(track_group, key=attrgetter("bitrate"))
|
||||
tracks.append(max_bitrate_track)
|
||||
|
||||
return tracks
|
||||
|
||||
|
||||
def create_hash(*args: str, decode=False, limit=7) -> str:
|
||||
"""
|
||||
Creates a simple hash for an album
|
||||
"""
|
||||
string = "".join(args)
|
||||
str_ = "".join(args)
|
||||
|
||||
if decode:
|
||||
string = unidecode(string)
|
||||
str_ = unidecode(str_)
|
||||
|
||||
string = string.lower().strip().replace(" ", "")
|
||||
string = "".join(t for t in string if t.isalnum())
|
||||
string = string.encode("utf-8")
|
||||
string = hashlib.sha256(string).hexdigest()
|
||||
return string[-limit:]
|
||||
str_ = str_.lower().strip().replace(" ", "")
|
||||
str_ = "".join(t for t in str_ if t.isalnum())
|
||||
str_ = str_.encode("utf-8")
|
||||
str_ = hashlib.sha256(str_).hexdigest()
|
||||
return str_[-limit:]
|
||||
|
||||
|
||||
def create_folder_hash(*args: str, limit=7) -> str:
|
||||
@ -191,7 +196,7 @@ def get_albumartists(albums: list[models.Album]) -> set[str]:
|
||||
|
||||
|
||||
def get_all_artists(
|
||||
tracks: list[models.Track], albums: list[models.Album]
|
||||
tracks: list[models.Track], albums: list[models.Album]
|
||||
) -> list[models.Artist]:
|
||||
artists_from_tracks = get_artists_from_tracks(tracks)
|
||||
artist_from_albums = get_albumartists(albums)
|
||||
@ -300,7 +305,7 @@ def win_replace_slash(path: str):
|
||||
|
||||
|
||||
def split_artists(src: str, with_and: bool = False):
|
||||
exp = r"\s*(?:and|&|,|;)\s*" if with_and else r"\s*[,;]\s*"
|
||||
exp = r"\s*(?: and |&|,|;)\s*" if with_and else r"\s*[,;]\s*"
|
||||
|
||||
artists = re.split(exp, src)
|
||||
return [a.strip() for a in artists]
|
||||
@ -349,10 +354,10 @@ def remove_prod(title: str) -> str:
|
||||
return title
|
||||
|
||||
# check if title has brackets
|
||||
if re.search(r'[()\[\]]', title):
|
||||
regex = r'\s?(\(|\[)prod\..*?(\)|\])\s?'
|
||||
if re.search(r"[()\[\]]", title):
|
||||
regex = r"\s?(\(|\[)prod\..*?(\)|\])\s?"
|
||||
else:
|
||||
regex = r'\s?\bprod\.\s*\S+'
|
||||
regex = r"\s?\bprod\.\s*\S+"
|
||||
|
||||
# remove the producer string
|
||||
title = re.sub(regex, "", title, flags=re.IGNORECASE)
|
||||
|
Loading…
x
Reference in New Issue
Block a user