rewrite remove duplicates to retain tracks with highest bitrate

+ bump fuzzy search cutoff to 90
+ remove unicodes from fuzzy search texts
This commit is contained in:
geoffrey45 2023-02-26 09:50:45 +03:00
parent 8e7021186d
commit c352037ccd
6 changed files with 81 additions and 83 deletions

View File

@ -7,6 +7,7 @@ from flask import Blueprint, request
from app import models, utils
from app.db.store import Store
from app.lib import searchlib
from unidecode import unidecode
api = Blueprint("search", __name__, url_prefix="/")
@ -35,8 +36,8 @@ class DoSearch:
:param :str:`query`: the search query.
"""
self.tracks: list[models.Track] = []
self.query = query
SearchResults.query = query
self.query = unidecode(query)
SearchResults.query = self.query
def search_tracks(self):
"""Calls :class:`SearchTracks` which returns the tracks that fuzzily match
@ -57,9 +58,8 @@ class DoSearch:
"""Calls :class:`SearchArtists` which returns the artists that fuzzily match
the search term. Then adds them to the `SearchResults` store.
"""
# self.artists = utils.Get.get_all_artists()
artists = [a.name for a in Store.artists]
artists = searchlib.SearchArtists(artists, self.query)()
artists = searchlib.SearchArtists(Store.artists, self.query)()
SearchResults.artists = artists
return artists
@ -68,7 +68,6 @@ class DoSearch:
"""Calls :class:`SearchAlbums` which returns the albums that fuzzily match
the search term. Then adds them to the `SearchResults` store.
"""
# albums = utils.Get.get_all_albums()
albums = Store.albums
albums = searchlib.SearchAlbums(albums, self.query)()
SearchResults.albums = albums
@ -179,12 +178,12 @@ def get_top_results():
DoSearch(query).search_all()
max = 2
max_results = 2
return {
"tracks": SearchResults.tracks[:max],
"albums": SearchResults.albums[:max],
"artists": SearchResults.artists[:max],
"playlists": SearchResults.playlists[:max],
"tracks": SearchResults.tracks[:max_results],
"albums": SearchResults.albums[:max_results],
"artists": SearchResults.artists[:max_results],
"playlists": SearchResults.playlists[:max_results],
}
@ -199,20 +198,20 @@ def search_load_more():
if s_type == "tracks":
t = SearchResults.tracks
return {
"tracks": t[index: index + SEARCH_COUNT],
"tracks": t[index : index + SEARCH_COUNT],
"more": len(t) > index + SEARCH_COUNT,
}
elif s_type == "albums":
a = SearchResults.albums
return {
"albums": a[index: index + SEARCH_COUNT],
"albums": a[index : index + SEARCH_COUNT],
"more": len(a) > index + SEARCH_COUNT,
}
elif s_type == "artists":
a = SearchResults.artists
return {
"artists": a[index: index + SEARCH_COUNT],
"artists": a[index : index + SEARCH_COUNT],
"more": len(a) > index + SEARCH_COUNT,
}

View File

@ -69,14 +69,8 @@ class Store:
Returns a list of tracks by their hashes.
"""
tracks = []
for trackhash in trackhashes:
for track in cls.tracks:
if track.trackhash == trackhash:
tracks.append(track)
return tracks
trackhashes = " ".join(trackhashes)
return [track for track in cls.tracks if track.trackhash in trackhashes]
@classmethod
def remove_track_by_filepath(cls, filepath: str):

View File

@ -7,7 +7,6 @@ from requests import ReadTimeout
from app import utils
from app.lib.artistlib import CheckArtistImages
from app.lib.colorlib import ProcessArtistColors
from app.lib.populate import Populate, PopulateCancelledError
from app.lib.trackslib import validate_tracks
from app.logger import log

View File

@ -4,6 +4,7 @@ This library contains all the functions related to the search functionality.
from typing import List
from rapidfuzz import fuzz, process
from unidecode import unidecode
from app import models
@ -16,10 +17,10 @@ class Cutoff:
Holds all the default cutoff values.
"""
tracks: int = 60
albums: int = 60
artists: int = 60
playlists: int = 60
tracks: int = 90
albums: int = 90
artists: int = 90
playlists: int = 90
class Limit:
@ -27,10 +28,10 @@ class Limit:
Holds all the default limit values.
"""
tracks: int = 50
albums: int = 50
artists: int = 50
playlists: int = 50
tracks: int = 150
albums: int = 150
artists: int = 150
playlists: int = 150
class SearchTracks:
@ -43,7 +44,7 @@ class SearchTracks:
Gets all songs with a given title.
"""
tracks = [track.og_title for track in self.tracks]
tracks = [unidecode(track.og_title).lower() for track in self.tracks]
results = process.extract(
self.query,
tracks,
@ -56,7 +57,7 @@ class SearchTracks:
class SearchArtists:
def __init__(self, artists: list[str], query: str) -> None:
def __init__(self, artists: list[models.Artist], query: str) -> None:
self.query = query
self.artists = artists
@ -64,17 +65,18 @@ class SearchArtists:
"""
Gets all artists with a given name.
"""
artists = [unidecode(a.name).lower() for a in self.artists]
results = process.extract(
self.query,
self.artists,
artists,
scorer=fuzz.WRatio,
score_cutoff=Cutoff.artists,
limit=Limit.artists,
)
artists = [a[0] for a in results]
return [models.Artist(a) for a in artists]
return [self.artists[i[2]] for i in results]
class SearchAlbums:
@ -87,7 +89,7 @@ class SearchAlbums:
Gets all albums with a given title.
"""
albums = [a.title.lower() for a in self.albums]
albums = [unidecode(a.title).lower() for a in self.albums]
results = process.extract(
self.query,

View File

@ -1,55 +1,54 @@
from datetime import datetime
from datetime import datetime, timezone
def date_string_to_time_passed(prev_date: str) -> str:
"""
Converts a date string to time passed. eg. 2 minutes ago, 1 hour ago, yesterday, 2 days ago, 2 weeks ago, etc.
"""
now = datetime.now()
then = datetime.strptime(prev_date, "%Y-%m-%d %H:%M:%S")
now = datetime.now(timezone.utc)
then = datetime.strptime(prev_date, "%Y-%m-%d %H:%M:%S").replace(
tzinfo=timezone.utc
)
diff = now - then
days = diff.days
seconds = diff.total_seconds()
if days < 0:
if seconds < 0:
return "in the future"
if days == 0:
seconds = diff.seconds
if seconds < 15:
return "now"
if seconds < 15:
return "now"
if seconds < 60:
return f"{int(seconds)} seconds ago"
if seconds < 60:
return str(seconds) + " seconds ago"
if seconds < 3600:
return f"{int(seconds // 60)} minutes ago"
if seconds < 3600:
return str(seconds // 60) + " minutes ago"
if seconds < 86400:
return f"{int(seconds // 3600)} hours ago"
return str(seconds // 3600) + " hours ago"
days = diff.days
if days == 1:
return "yesterday"
if days < 7:
return str(days) + " days ago"
return f"{days} days ago"
if days < 14:
return "1 week ago"
if days < 30:
if days < 14:
return "1 week ago"
return f"{int(days // 7)} weeks ago"
if days < 60:
return "1 month ago"
return str(days // 7) + " weeks ago"
if days < 365:
if days < 60:
return "1 month ago"
return f"{int(days // 30)} months ago"
return str(days // 30) + " months ago"
if days > 365:
if days < 730:
return "1 year ago"
return str(days // 365) + " years ago"
return "I honestly don't know"
if days < 730:
return "1 year ago"
return f"{int(days // 365)} years ago"

View File

@ -11,6 +11,8 @@ import string
import threading
from datetime import datetime
from pathlib import Path
from collections import defaultdict
from operator import attrgetter
import requests
from unidecode import unidecode
@ -67,34 +69,37 @@ def run_fast_scandir(_dir: str, full=False) -> tuple[list[str], list[str]]:
def remove_duplicates(tracks: list[models.Track]) -> list[models.Track]:
"""
Removes duplicate tracks from a list of tracks.
Remove duplicates from a list of Track objects based on the trackhash attribute.
Retains objects with the highest bitrate.
"""
hashes = []
hash_to_tracks = defaultdict(list)
for track in tracks:
if track.trackhash not in hashes:
hashes.append(track.trackhash)
hash_to_tracks[track.trackhash].append(track)
tracks = sorted(tracks, key=lambda x: x.trackhash)
tracks = UseBisection(tracks, "trackhash", hashes)()
tracks = []
return [t for t in tracks if t is not None]
for track_group in hash_to_tracks.values():
max_bitrate_track = max(track_group, key=attrgetter("bitrate"))
tracks.append(max_bitrate_track)
return tracks
def create_hash(*args: str, decode=False, limit=7) -> str:
"""
Creates a simple hash for an album
"""
string = "".join(args)
str_ = "".join(args)
if decode:
string = unidecode(string)
str_ = unidecode(str_)
string = string.lower().strip().replace(" ", "")
string = "".join(t for t in string if t.isalnum())
string = string.encode("utf-8")
string = hashlib.sha256(string).hexdigest()
return string[-limit:]
str_ = str_.lower().strip().replace(" ", "")
str_ = "".join(t for t in str_ if t.isalnum())
str_ = str_.encode("utf-8")
str_ = hashlib.sha256(str_).hexdigest()
return str_[-limit:]
def create_folder_hash(*args: str, limit=7) -> str:
@ -191,7 +196,7 @@ def get_albumartists(albums: list[models.Album]) -> set[str]:
def get_all_artists(
tracks: list[models.Track], albums: list[models.Album]
tracks: list[models.Track], albums: list[models.Album]
) -> list[models.Artist]:
artists_from_tracks = get_artists_from_tracks(tracks)
artist_from_albums = get_albumartists(albums)
@ -300,7 +305,7 @@ def win_replace_slash(path: str):
def split_artists(src: str, with_and: bool = False):
exp = r"\s*(?:and|&|,|;)\s*" if with_and else r"\s*[,;]\s*"
exp = r"\s*(?: and |&|,|;)\s*" if with_and else r"\s*[,;]\s*"
artists = re.split(exp, src)
return [a.strip() for a in artists]
@ -349,10 +354,10 @@ def remove_prod(title: str) -> str:
return title
# check if title has brackets
if re.search(r'[()\[\]]', title):
regex = r'\s?(\(|\[)prod\..*?(\)|\])\s?'
if re.search(r"[()\[\]]", title):
regex = r"\s?(\(|\[)prod\..*?(\)|\])\s?"
else:
regex = r'\s?\bprod\.\s*\S+'
regex = r"\s?\bprod\.\s*\S+"
# remove the producer string
title = re.sub(regex, "", title, flags=re.IGNORECASE)