rewrite remove duplicates to retain tracks with highest bitrate

+ bump fuzzy search cutoff to 90 + remove unicodes from fuzzy search texts
2025-06-07 03:35:35 +00:00 · 2023-02-26 09:50:45 +03:00 · 2023-02-26 09:50:45 +03:00 · c352037ccd
commit c352037ccd
parent 8e7021186d
6 changed files with 81 additions and 83 deletions
--- a/app/api/search.py
+++ b/app/api/search.py
@ -7,6 +7,7 @@ from flask import Blueprint, request
 from app import models, utils
 from app.db.store import Store
 from app.lib import searchlib
+from unidecode import unidecode

 api = Blueprint("search", __name__, url_prefix="/")

@ -35,8 +36,8 @@ class DoSearch:
        :param :str:`query`: the search query.
        """
        self.tracks: list[models.Track] = []
-        self.query = query
-        SearchResults.query = query
+        self.query = unidecode(query)
+        SearchResults.query = self.query

    def search_tracks(self):
        """Calls :class:`SearchTracks` which returns the tracks that fuzzily match
@ -57,9 +58,8 @@ class DoSearch:
        """Calls :class:`SearchArtists` which returns the artists that fuzzily match
        the search term. Then adds them to the `SearchResults` store.
        """
-        # self.artists = utils.Get.get_all_artists()
        artists = [a.name for a in Store.artists]
-        artists = searchlib.SearchArtists(artists, self.query)()
+        artists = searchlib.SearchArtists(Store.artists, self.query)()
        SearchResults.artists = artists

        return artists
@ -68,7 +68,6 @@ class DoSearch:
        """Calls :class:`SearchAlbums` which returns the albums that fuzzily match
        the search term. Then adds them to the `SearchResults` store.
        """
-        # albums = utils.Get.get_all_albums()
        albums = Store.albums
        albums = searchlib.SearchAlbums(albums, self.query)()
        SearchResults.albums = albums
@ -179,12 +178,12 @@ def get_top_results():

    DoSearch(query).search_all()

-    max = 2
+    max_results = 2
    return {
-        "tracks": SearchResults.tracks[:max],
-        "albums": SearchResults.albums[:max],
-        "artists": SearchResults.artists[:max],
-        "playlists": SearchResults.playlists[:max],
+        "tracks": SearchResults.tracks[:max_results],
+        "albums": SearchResults.albums[:max_results],
+        "artists": SearchResults.artists[:max_results],
+        "playlists": SearchResults.playlists[:max_results],
    }


@ -199,20 +198,20 @@ def search_load_more():
    if s_type == "tracks":
        t = SearchResults.tracks
        return {
-            "tracks": t[index: index + SEARCH_COUNT],
+            "tracks": t[index : index + SEARCH_COUNT],
            "more": len(t) > index + SEARCH_COUNT,
        }

    elif s_type == "albums":
        a = SearchResults.albums
        return {
-            "albums": a[index: index + SEARCH_COUNT],
+            "albums": a[index : index + SEARCH_COUNT],
            "more": len(a) > index + SEARCH_COUNT,
        }

    elif s_type == "artists":
        a = SearchResults.artists
        return {
-            "artists": a[index: index + SEARCH_COUNT],
+            "artists": a[index : index + SEARCH_COUNT],
            "more": len(a) > index + SEARCH_COUNT,
        }
--- a/app/db/store.py
+++ b/app/db/store.py
@ -69,14 +69,8 @@ class Store:
        Returns a list of tracks by their hashes.
        """

-        tracks = []
-
-        for trackhash in trackhashes:
-            for track in cls.tracks:
-                if track.trackhash == trackhash:
-                    tracks.append(track)
-
-        return tracks
+        trackhashes = " ".join(trackhashes)
+        return [track for track in cls.tracks if track.trackhash in trackhashes]

    @classmethod
    def remove_track_by_filepath(cls, filepath: str):
--- a/app/functions.py
+++ b/app/functions.py
@ -7,7 +7,6 @@ from requests import ReadTimeout

 from app import utils
 from app.lib.artistlib import CheckArtistImages
-from app.lib.colorlib import ProcessArtistColors
 from app.lib.populate import Populate, PopulateCancelledError
 from app.lib.trackslib import validate_tracks
 from app.logger import log
--- a/app/lib/searchlib.py
+++ b/app/lib/searchlib.py
@ -4,6 +4,7 @@ This library contains all the functions related to the search functionality.
 from typing import List

 from rapidfuzz import fuzz, process
+from unidecode import unidecode

 from app import models

@ -16,10 +17,10 @@ class Cutoff:
    Holds all the default cutoff values.
    """

-    tracks: int = 60
-    albums: int = 60
-    artists: int = 60
-    playlists: int = 60
+    tracks: int = 90
+    albums: int = 90
+    artists: int = 90
+    playlists: int = 90


 class Limit:
@ -27,10 +28,10 @@ class Limit:
    Holds all the default limit values.
    """

-    tracks: int = 50
-    albums: int = 50
-    artists: int = 50
-    playlists: int = 50
+    tracks: int = 150
+    albums: int = 150
+    artists: int = 150
+    playlists: int = 150


 class SearchTracks:
@ -43,7 +44,7 @@ class SearchTracks:
        Gets all songs with a given title.
        """

-        tracks = [track.og_title for track in self.tracks]
+        tracks = [unidecode(track.og_title).lower() for track in self.tracks]
        results = process.extract(
            self.query,
            tracks,
@ -56,7 +57,7 @@ class SearchTracks:


 class SearchArtists:
-    def __init__(self, artists: list[str], query: str) -> None:
+    def __init__(self, artists: list[models.Artist], query: str) -> None:
        self.query = query
        self.artists = artists

@ -64,17 +65,18 @@ class SearchArtists:
        """
        Gets all artists with a given name.
        """
+        artists = [unidecode(a.name).lower() for a in self.artists]

        results = process.extract(
            self.query,
-            self.artists,
+            artists,
            scorer=fuzz.WRatio,
            score_cutoff=Cutoff.artists,
            limit=Limit.artists,
        )

        artists = [a[0] for a in results]
-        return [models.Artist(a) for a in artists]
+        return [self.artists[i[2]] for i in results]


 class SearchAlbums:
@ -87,7 +89,7 @@ class SearchAlbums:
        Gets all albums with a given title.
        """

-        albums = [a.title.lower() for a in self.albums]
+        albums = [unidecode(a.title).lower() for a in self.albums]

        results = process.extract(
            self.query,
--- a/app/serializer.py
+++ b/app/serializer.py
@ -1,55 +1,54 @@
-from datetime import datetime
+from datetime import datetime, timezone


 def date_string_to_time_passed(prev_date: str) -> str:
    """
    Converts a date string to time passed. eg. 2 minutes ago, 1 hour ago, yesterday, 2 days ago, 2 weeks ago, etc.
    """
-
-    now = datetime.now()
-    then = datetime.strptime(prev_date, "%Y-%m-%d %H:%M:%S")
+    now = datetime.now(timezone.utc)
+    then = datetime.strptime(prev_date, "%Y-%m-%d %H:%M:%S").replace(
+        tzinfo=timezone.utc
+    )

    diff = now - then
-    days = diff.days
+    seconds = diff.total_seconds()

-    if days < 0:
+    if seconds < 0:
        return "in the future"

-    if days == 0:
-        seconds = diff.seconds
+    if seconds < 15:
+        return "now"

-        if seconds < 15:
-            return "now"
+    if seconds < 60:
+        return f"{int(seconds)} seconds ago"

-        if seconds < 60:
-            return str(seconds) + " seconds ago"
+    if seconds < 3600:
+        return f"{int(seconds // 60)} minutes ago"

-        if seconds < 3600:
-            return str(seconds // 60) + " minutes ago"
+    if seconds < 86400:
+        return f"{int(seconds // 3600)} hours ago"

-        return str(seconds // 3600) + " hours ago"
+    days = diff.days

    if days == 1:
        return "yesterday"

    if days < 7:
-        return str(days) + " days ago"
+        return f"{days} days ago"
+
+    if days < 14:
+        return "1 week ago"

    if days < 30:
-        if days < 14:
-            return "1 week ago"
+        return f"{int(days // 7)} weeks ago"
+
+    if days < 60:
+        return "1 month ago"

-        return str(days // 7) + " weeks ago"
    if days < 365:
-        if days < 60:
-            return "1 month ago"
+        return f"{int(days // 30)} months ago"

-        return str(days // 30) + " months ago"
-    if days > 365:
-        if days < 730:
-            return "1 year ago"
-
-        return str(days // 365) + " years ago"
-
-    return "I honestly don't know"
+    if days < 730:
+        return "1 year ago"

+    return f"{int(days // 365)} years ago"
--- a/app/utils.py
+++ b/app/utils.py
@ -11,6 +11,8 @@ import string
 import threading
 from datetime import datetime
 from pathlib import Path
+from collections import defaultdict
+from operator import attrgetter

 import requests
 from unidecode import unidecode
@ -67,34 +69,37 @@ def run_fast_scandir(_dir: str, full=False) -> tuple[list[str], list[str]]:

 def remove_duplicates(tracks: list[models.Track]) -> list[models.Track]:
    """
-    Removes duplicate tracks from a list of tracks.
+    Remove duplicates from a list of Track objects based on the trackhash attribute.
+    Retains objects with the highest bitrate.
    """
-    hashes = []
+    hash_to_tracks = defaultdict(list)

    for track in tracks:
-        if track.trackhash not in hashes:
-            hashes.append(track.trackhash)
+        hash_to_tracks[track.trackhash].append(track)

-    tracks = sorted(tracks, key=lambda x: x.trackhash)
-    tracks = UseBisection(tracks, "trackhash", hashes)()
+    tracks = []

-    return [t for t in tracks if t is not None]
+    for track_group in hash_to_tracks.values():
+        max_bitrate_track = max(track_group, key=attrgetter("bitrate"))
+        tracks.append(max_bitrate_track)
+
+    return tracks


 def create_hash(*args: str, decode=False, limit=7) -> str:
    """
    Creates a simple hash for an album
    """
-    string = "".join(args)
+    str_ = "".join(args)

    if decode:
-        string = unidecode(string)
+        str_ = unidecode(str_)

-    string = string.lower().strip().replace(" ", "")
-    string = "".join(t for t in string if t.isalnum())
-    string = string.encode("utf-8")
-    string = hashlib.sha256(string).hexdigest()
-    return string[-limit:]
+    str_ = str_.lower().strip().replace(" ", "")
+    str_ = "".join(t for t in str_ if t.isalnum())
+    str_ = str_.encode("utf-8")
+    str_ = hashlib.sha256(str_).hexdigest()
+    return str_[-limit:]


 def create_folder_hash(*args: str, limit=7) -> str:
@ -191,7 +196,7 @@ def get_albumartists(albums: list[models.Album]) -> set[str]:


 def get_all_artists(
-        tracks: list[models.Track], albums: list[models.Album]
+    tracks: list[models.Track], albums: list[models.Album]
 ) -> list[models.Artist]:
    artists_from_tracks = get_artists_from_tracks(tracks)
    artist_from_albums = get_albumartists(albums)
@ -300,7 +305,7 @@ def win_replace_slash(path: str):


 def split_artists(src: str, with_and: bool = False):
-    exp = r"\s*(?:and|&|,|;)\s*" if with_and else r"\s*[,;]\s*"
+    exp = r"\s*(?: and |&|,|;)\s*" if with_and else r"\s*[,;]\s*"

    artists = re.split(exp, src)
    return [a.strip() for a in artists]
@ -349,10 +354,10 @@ def remove_prod(title: str) -> str:
        return title

    # check if title has brackets
-    if re.search(r'[()\[\]]', title):
-        regex = r'\s?(\(|\[)prod\..*?(\)|\])\s?'
+    if re.search(r"[()\[\]]", title):
+        regex = r"\s?(\(|\[)prod\..*?(\)|\])\s?"
    else:
-        regex = r'\s?\bprod\.\s*\S+'
+        regex = r"\s?\bprod\.\s*\S+"

    # remove the producer string
    title = re.sub(regex, "", title, flags=re.IGNORECASE)