rewrite remove duplicates to retain tracks with highest bitrate

+ bump fuzzy search cutoff to 90
+ remove unicodes from fuzzy search texts
This commit is contained in:
geoffrey45 2023-02-26 09:50:45 +03:00
parent 8e7021186d
commit c352037ccd
6 changed files with 81 additions and 83 deletions

View File

@ -7,6 +7,7 @@ from flask import Blueprint, request
from app import models, utils from app import models, utils
from app.db.store import Store from app.db.store import Store
from app.lib import searchlib from app.lib import searchlib
from unidecode import unidecode
api = Blueprint("search", __name__, url_prefix="/") api = Blueprint("search", __name__, url_prefix="/")
@ -35,8 +36,8 @@ class DoSearch:
:param :str:`query`: the search query. :param :str:`query`: the search query.
""" """
self.tracks: list[models.Track] = [] self.tracks: list[models.Track] = []
self.query = query self.query = unidecode(query)
SearchResults.query = query SearchResults.query = self.query
def search_tracks(self): def search_tracks(self):
"""Calls :class:`SearchTracks` which returns the tracks that fuzzily match """Calls :class:`SearchTracks` which returns the tracks that fuzzily match
@ -57,9 +58,8 @@ class DoSearch:
"""Calls :class:`SearchArtists` which returns the artists that fuzzily match """Calls :class:`SearchArtists` which returns the artists that fuzzily match
the search term. Then adds them to the `SearchResults` store. the search term. Then adds them to the `SearchResults` store.
""" """
# self.artists = utils.Get.get_all_artists()
artists = [a.name for a in Store.artists] artists = [a.name for a in Store.artists]
artists = searchlib.SearchArtists(artists, self.query)() artists = searchlib.SearchArtists(Store.artists, self.query)()
SearchResults.artists = artists SearchResults.artists = artists
return artists return artists
@ -68,7 +68,6 @@ class DoSearch:
"""Calls :class:`SearchAlbums` which returns the albums that fuzzily match """Calls :class:`SearchAlbums` which returns the albums that fuzzily match
the search term. Then adds them to the `SearchResults` store. the search term. Then adds them to the `SearchResults` store.
""" """
# albums = utils.Get.get_all_albums()
albums = Store.albums albums = Store.albums
albums = searchlib.SearchAlbums(albums, self.query)() albums = searchlib.SearchAlbums(albums, self.query)()
SearchResults.albums = albums SearchResults.albums = albums
@ -179,12 +178,12 @@ def get_top_results():
DoSearch(query).search_all() DoSearch(query).search_all()
max = 2 max_results = 2
return { return {
"tracks": SearchResults.tracks[:max], "tracks": SearchResults.tracks[:max_results],
"albums": SearchResults.albums[:max], "albums": SearchResults.albums[:max_results],
"artists": SearchResults.artists[:max], "artists": SearchResults.artists[:max_results],
"playlists": SearchResults.playlists[:max], "playlists": SearchResults.playlists[:max_results],
} }
@ -199,20 +198,20 @@ def search_load_more():
if s_type == "tracks": if s_type == "tracks":
t = SearchResults.tracks t = SearchResults.tracks
return { return {
"tracks": t[index: index + SEARCH_COUNT], "tracks": t[index : index + SEARCH_COUNT],
"more": len(t) > index + SEARCH_COUNT, "more": len(t) > index + SEARCH_COUNT,
} }
elif s_type == "albums": elif s_type == "albums":
a = SearchResults.albums a = SearchResults.albums
return { return {
"albums": a[index: index + SEARCH_COUNT], "albums": a[index : index + SEARCH_COUNT],
"more": len(a) > index + SEARCH_COUNT, "more": len(a) > index + SEARCH_COUNT,
} }
elif s_type == "artists": elif s_type == "artists":
a = SearchResults.artists a = SearchResults.artists
return { return {
"artists": a[index: index + SEARCH_COUNT], "artists": a[index : index + SEARCH_COUNT],
"more": len(a) > index + SEARCH_COUNT, "more": len(a) > index + SEARCH_COUNT,
} }

View File

@ -69,14 +69,8 @@ class Store:
Returns a list of tracks by their hashes. Returns a list of tracks by their hashes.
""" """
tracks = [] trackhashes = " ".join(trackhashes)
return [track for track in cls.tracks if track.trackhash in trackhashes]
for trackhash in trackhashes:
for track in cls.tracks:
if track.trackhash == trackhash:
tracks.append(track)
return tracks
@classmethod @classmethod
def remove_track_by_filepath(cls, filepath: str): def remove_track_by_filepath(cls, filepath: str):

View File

@ -7,7 +7,6 @@ from requests import ReadTimeout
from app import utils from app import utils
from app.lib.artistlib import CheckArtistImages from app.lib.artistlib import CheckArtistImages
from app.lib.colorlib import ProcessArtistColors
from app.lib.populate import Populate, PopulateCancelledError from app.lib.populate import Populate, PopulateCancelledError
from app.lib.trackslib import validate_tracks from app.lib.trackslib import validate_tracks
from app.logger import log from app.logger import log

View File

@ -4,6 +4,7 @@ This library contains all the functions related to the search functionality.
from typing import List from typing import List
from rapidfuzz import fuzz, process from rapidfuzz import fuzz, process
from unidecode import unidecode
from app import models from app import models
@ -16,10 +17,10 @@ class Cutoff:
Holds all the default cutoff values. Holds all the default cutoff values.
""" """
tracks: int = 60 tracks: int = 90
albums: int = 60 albums: int = 90
artists: int = 60 artists: int = 90
playlists: int = 60 playlists: int = 90
class Limit: class Limit:
@ -27,10 +28,10 @@ class Limit:
Holds all the default limit values. Holds all the default limit values.
""" """
tracks: int = 50 tracks: int = 150
albums: int = 50 albums: int = 150
artists: int = 50 artists: int = 150
playlists: int = 50 playlists: int = 150
class SearchTracks: class SearchTracks:
@ -43,7 +44,7 @@ class SearchTracks:
Gets all songs with a given title. Gets all songs with a given title.
""" """
tracks = [track.og_title for track in self.tracks] tracks = [unidecode(track.og_title).lower() for track in self.tracks]
results = process.extract( results = process.extract(
self.query, self.query,
tracks, tracks,
@ -56,7 +57,7 @@ class SearchTracks:
class SearchArtists: class SearchArtists:
def __init__(self, artists: list[str], query: str) -> None: def __init__(self, artists: list[models.Artist], query: str) -> None:
self.query = query self.query = query
self.artists = artists self.artists = artists
@ -64,17 +65,18 @@ class SearchArtists:
""" """
Gets all artists with a given name. Gets all artists with a given name.
""" """
artists = [unidecode(a.name).lower() for a in self.artists]
results = process.extract( results = process.extract(
self.query, self.query,
self.artists, artists,
scorer=fuzz.WRatio, scorer=fuzz.WRatio,
score_cutoff=Cutoff.artists, score_cutoff=Cutoff.artists,
limit=Limit.artists, limit=Limit.artists,
) )
artists = [a[0] for a in results] artists = [a[0] for a in results]
return [models.Artist(a) for a in artists] return [self.artists[i[2]] for i in results]
class SearchAlbums: class SearchAlbums:
@ -87,7 +89,7 @@ class SearchAlbums:
Gets all albums with a given title. Gets all albums with a given title.
""" """
albums = [a.title.lower() for a in self.albums] albums = [unidecode(a.title).lower() for a in self.albums]
results = process.extract( results = process.extract(
self.query, self.query,

View File

@ -1,55 +1,54 @@
from datetime import datetime from datetime import datetime, timezone
def date_string_to_time_passed(prev_date: str) -> str: def date_string_to_time_passed(prev_date: str) -> str:
""" """
Converts a date string to time passed. eg. 2 minutes ago, 1 hour ago, yesterday, 2 days ago, 2 weeks ago, etc. Converts a date string to time passed. eg. 2 minutes ago, 1 hour ago, yesterday, 2 days ago, 2 weeks ago, etc.
""" """
now = datetime.now(timezone.utc)
now = datetime.now() then = datetime.strptime(prev_date, "%Y-%m-%d %H:%M:%S").replace(
then = datetime.strptime(prev_date, "%Y-%m-%d %H:%M:%S") tzinfo=timezone.utc
)
diff = now - then diff = now - then
days = diff.days seconds = diff.total_seconds()
if days < 0: if seconds < 0:
return "in the future" return "in the future"
if days == 0: if seconds < 15:
seconds = diff.seconds return "now"
if seconds < 15: if seconds < 60:
return "now" return f"{int(seconds)} seconds ago"
if seconds < 60: if seconds < 3600:
return str(seconds) + " seconds ago" return f"{int(seconds // 60)} minutes ago"
if seconds < 3600: if seconds < 86400:
return str(seconds // 60) + " minutes ago" return f"{int(seconds // 3600)} hours ago"
return str(seconds // 3600) + " hours ago" days = diff.days
if days == 1: if days == 1:
return "yesterday" return "yesterday"
if days < 7: if days < 7:
return str(days) + " days ago" return f"{days} days ago"
if days < 14:
return "1 week ago"
if days < 30: if days < 30:
if days < 14: return f"{int(days // 7)} weeks ago"
return "1 week ago"
if days < 60:
return "1 month ago"
return str(days // 7) + " weeks ago"
if days < 365: if days < 365:
if days < 60: return f"{int(days // 30)} months ago"
return "1 month ago"
return str(days // 30) + " months ago" if days < 730:
if days > 365: return "1 year ago"
if days < 730:
return "1 year ago"
return str(days // 365) + " years ago"
return "I honestly don't know"
return f"{int(days // 365)} years ago"

View File

@ -11,6 +11,8 @@ import string
import threading import threading
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from collections import defaultdict
from operator import attrgetter
import requests import requests
from unidecode import unidecode from unidecode import unidecode
@ -67,34 +69,37 @@ def run_fast_scandir(_dir: str, full=False) -> tuple[list[str], list[str]]:
def remove_duplicates(tracks: list[models.Track]) -> list[models.Track]: def remove_duplicates(tracks: list[models.Track]) -> list[models.Track]:
""" """
Removes duplicate tracks from a list of tracks. Remove duplicates from a list of Track objects based on the trackhash attribute.
Retains objects with the highest bitrate.
""" """
hashes = [] hash_to_tracks = defaultdict(list)
for track in tracks: for track in tracks:
if track.trackhash not in hashes: hash_to_tracks[track.trackhash].append(track)
hashes.append(track.trackhash)
tracks = sorted(tracks, key=lambda x: x.trackhash) tracks = []
tracks = UseBisection(tracks, "trackhash", hashes)()
return [t for t in tracks if t is not None] for track_group in hash_to_tracks.values():
max_bitrate_track = max(track_group, key=attrgetter("bitrate"))
tracks.append(max_bitrate_track)
return tracks
def create_hash(*args: str, decode=False, limit=7) -> str: def create_hash(*args: str, decode=False, limit=7) -> str:
""" """
Creates a simple hash for an album Creates a simple hash for an album
""" """
string = "".join(args) str_ = "".join(args)
if decode: if decode:
string = unidecode(string) str_ = unidecode(str_)
string = string.lower().strip().replace(" ", "") str_ = str_.lower().strip().replace(" ", "")
string = "".join(t for t in string if t.isalnum()) str_ = "".join(t for t in str_ if t.isalnum())
string = string.encode("utf-8") str_ = str_.encode("utf-8")
string = hashlib.sha256(string).hexdigest() str_ = hashlib.sha256(str_).hexdigest()
return string[-limit:] return str_[-limit:]
def create_folder_hash(*args: str, limit=7) -> str: def create_folder_hash(*args: str, limit=7) -> str:
@ -191,7 +196,7 @@ def get_albumartists(albums: list[models.Album]) -> set[str]:
def get_all_artists( def get_all_artists(
tracks: list[models.Track], albums: list[models.Album] tracks: list[models.Track], albums: list[models.Album]
) -> list[models.Artist]: ) -> list[models.Artist]:
artists_from_tracks = get_artists_from_tracks(tracks) artists_from_tracks = get_artists_from_tracks(tracks)
artist_from_albums = get_albumartists(albums) artist_from_albums = get_albumartists(albums)
@ -300,7 +305,7 @@ def win_replace_slash(path: str):
def split_artists(src: str, with_and: bool = False): def split_artists(src: str, with_and: bool = False):
exp = r"\s*(?:and|&|,|;)\s*" if with_and else r"\s*[,;]\s*" exp = r"\s*(?: and |&|,|;)\s*" if with_and else r"\s*[,;]\s*"
artists = re.split(exp, src) artists = re.split(exp, src)
return [a.strip() for a in artists] return [a.strip() for a in artists]
@ -349,10 +354,10 @@ def remove_prod(title: str) -> str:
return title return title
# check if title has brackets # check if title has brackets
if re.search(r'[()\[\]]', title): if re.search(r"[()\[\]]", title):
regex = r'\s?(\(|\[)prod\..*?(\)|\])\s?' regex = r"\s?(\(|\[)prod\..*?(\)|\])\s?"
else: else:
regex = r'\s?\bprod\.\s*\S+' regex = r"\s?\bprod\.\s*\S+"
# remove the producer string # remove the producer string
title = re.sub(regex, "", title, flags=re.IGNORECASE) title = re.sub(regex, "", title, flags=re.IGNORECASE)