rewrite remove duplicates to support removing duplicates in albums tracks efficiently

+ remove flags added to client settings page
+ misc
This commit is contained in:
mungai-njoroge 2023-08-29 20:04:30 +03:00
parent 26e36ba36f
commit 13475b0630
16 changed files with 118 additions and 95 deletions

View File

@ -50,6 +50,19 @@ Usage: swingmusic [options]
| | | | | |
| --build | Build the application (in development) | | --build | Build the application (in development) |
| Option | Short | Description |
|--------------------------|-------|-------------------------------------------------------|
| `--help` | `-h` | Show help message |
| `--version` | `-v` | Show the app version |
| `--host` | | Set the host |
| `--port` | | Set the port |
| `--config` | | Set the config path |
| `--no-periodic-scan`| `-nps` | Disable periodic scan |
| `--scan-interval` | `-psi` | Set the periodic scan interval in seconds. Default is 300 seconds (5 minutes) |
| `--build` | | Build the application (in development) |
To stream your music across your local network, use the `--host` flag to run the app in all ports. Like this: To stream your music across your local network, use the `--host` flag to run the app in all ports. Like this:
```sh ```sh

View File

@ -3,8 +3,6 @@ Contains all the album routes.
""" """
import random import random
from dataclasses import asdict
from typing import Any
from flask import Blueprint, request from flask import Blueprint, request
@ -18,7 +16,6 @@ from app.serializers.track import serialize_track
from app.store.albums import AlbumStore from app.store.albums import AlbumStore
from app.store.tracks import TrackStore from app.store.tracks import TrackStore
from app.utils.hashing import create_hash from app.utils.hashing import create_hash
from app.utils.remove_duplicates import remove_duplicates
get_albums_by_albumartist = adb.get_albums_by_albumartist get_albums_by_albumartist = adb.get_albums_by_albumartist
check_is_fav = favdb.check_is_favorite check_is_fav = favdb.check_is_favorite
@ -67,13 +64,12 @@ def get_album_tracks_and_info():
return list(genres) return list(genres)
album.genres = get_album_genres(tracks) album.genres = get_album_genres(tracks)
tracks = remove_duplicates(tracks)
album.count = len(tracks) album.count = len(tracks)
album.get_date_from_tracks(tracks) album.get_date_from_tracks(tracks)
try: try:
album.duration = sum((t.duration for t in tracks)) album.duration = sum(t.duration for t in tracks)
except AttributeError: except AttributeError:
album.duration = 0 album.duration = 0

View File

@ -4,7 +4,7 @@ from app.db.sqlite.settings import SettingsSQLMethods as sdb
from app.lib import populate from app.lib import populate
from app.lib.watchdogg import Watcher as WatchDog from app.lib.watchdogg import Watcher as WatchDog
from app.logger import log from app.logger import log
from app.settings import ParserFlags, Paths, set_flag from app.settings import Paths, SessionVarKeys, set_flag
from app.store.albums import AlbumStore from app.store.albums import AlbumStore
from app.store.artists import ArtistStore from app.store.artists import ArtistStore
from app.store.tracks import TrackStore from app.store.tracks import TrackStore
@ -143,12 +143,12 @@ def get_root_dirs():
# maps settings to their parser flags # maps settings to their parser flags
mapp = { mapp = {
"artist_separators": ParserFlags.ARTIST_SEPARATORS, "artist_separators": SessionVarKeys.ARTIST_SEPARATORS,
"extract_feat": ParserFlags.EXTRACT_FEAT, "extract_feat": SessionVarKeys.EXTRACT_FEAT,
"remove_prod": ParserFlags.REMOVE_PROD, "remove_prod": SessionVarKeys.REMOVE_PROD,
"clean_album_title": ParserFlags.CLEAN_ALBUM_TITLE, "clean_album_title": SessionVarKeys.CLEAN_ALBUM_TITLE,
"remove_remaster": ParserFlags.REMOVE_REMASTER_FROM_TRACK, "remove_remaster": SessionVarKeys.REMOVE_REMASTER_FROM_TRACK,
"merge_albums": ParserFlags.MERGE_ALBUM_VERSIONS, "merge_albums": SessionVarKeys.MERGE_ALBUM_VERSIONS,
} }

View File

@ -3,14 +3,14 @@ Handles arguments passed to the program.
""" """
import os.path import os.path
import sys import sys
from configparser import ConfigParser from configparser import ConfigParser
import PyInstaller.__main__ as bundler import PyInstaller.__main__ as bundler
from app import settings from app import settings
from app.logger import log
from app.print_help import HELP_MESSAGE from app.print_help import HELP_MESSAGE
from app.utils.wintools import is_windows from app.utils.wintools import is_windows
from app.logger import log
from app.utils.xdg_utils import get_xdg_config_dir from app.utils.xdg_utils import get_xdg_config_dir
# from app.api.imgserver import set_app_dir # from app.api.imgserver import set_app_dir
@ -29,10 +29,6 @@ class HandleArgs:
self.handle_port() self.handle_port()
self.handle_config_path() self.handle_config_path()
self.handle_no_feat()
self.handle_remove_prod()
self.handle_cleaning_albums()
self.handle_cleaning_tracks()
self.handle_periodic_scan() self.handle_periodic_scan()
self.handle_periodic_scan_interval() self.handle_periodic_scan_interval()
@ -122,31 +118,10 @@ class HandleArgs:
settings.Paths.set_config_dir(get_xdg_config_dir()) settings.Paths.set_config_dir(get_xdg_config_dir())
@staticmethod
def handle_no_feat():
# if ArgsEnum.no_feat in ARGS:
if any((a in ARGS for a in ALLARGS.show_feat)):
settings.FromFlags.EXTRACT_FEAT = False
@staticmethod
def handle_remove_prod():
if any((a in ARGS for a in ALLARGS.show_prod)):
settings.FromFlags.REMOVE_PROD = False
@staticmethod
def handle_cleaning_albums():
if any((a in ARGS for a in ALLARGS.dont_clean_albums)):
settings.FromFlags.CLEAN_ALBUM_TITLE = False
@staticmethod
def handle_cleaning_tracks():
if any((a in ARGS for a in ALLARGS.dont_clean_tracks)):
settings.FromFlags.REMOVE_REMASTER_FROM_TRACK = False
@staticmethod @staticmethod
def handle_periodic_scan(): def handle_periodic_scan():
if any((a in ARGS for a in ALLARGS.no_periodic_scan)): if any((a in ARGS for a in ALLARGS.no_periodic_scan)):
settings.FromFlags.DO_PERIODIC_SCANS = False settings.SessionVars.DO_PERIODIC_SCANS = False
@staticmethod @staticmethod
def handle_periodic_scan_interval(): def handle_periodic_scan_interval():
@ -161,8 +136,6 @@ class HandleArgs:
print("ERROR: Interval not specified") print("ERROR: Interval not specified")
sys.exit(0) sys.exit(0)
# psi = 0
try: try:
psi = int(interval) psi = int(interval)
except ValueError: except ValueError:
@ -173,7 +146,7 @@ class HandleArgs:
print("WADAFUCK ARE YOU TRYING?") print("WADAFUCK ARE YOU TRYING?")
sys.exit(0) sys.exit(0)
settings.FromFlags.PERIODIC_SCAN_INTERVAL = psi settings.SessionVars.PERIODIC_SCAN_INTERVAL = psi
@staticmethod @staticmethod
def handle_help(): def handle_help():

View File

@ -1,8 +1,9 @@
from pprint import pprint from pprint import pprint
from typing import Any from typing import Any
from app.db.sqlite.utils import SQLiteManager from app.db.sqlite.utils import SQLiteManager
from app.settings import SessionVars
from app.utils.wintools import win_replace_slash from app.utils.wintools import win_replace_slash
from app.settings import FromFlags
class SettingsSQLMethods: class SettingsSQLMethods:
@ -138,11 +139,11 @@ def load_settings():
separators = db_separators.split(",") separators = db_separators.split(",")
separators = set(separators) separators = set(separators)
FromFlags.ARTIST_SEPARATORS = separators SessionVars.ARTIST_SEPARATORS = separators
# boolean settings # boolean settings
FromFlags.EXTRACT_FEAT = bool(s[1]) SessionVars.EXTRACT_FEAT = bool(s[1])
FromFlags.REMOVE_PROD = bool(s[2]) SessionVars.REMOVE_PROD = bool(s[2])
FromFlags.CLEAN_ALBUM_TITLE = bool(s[3]) SessionVars.CLEAN_ALBUM_TITLE = bool(s[3])
FromFlags.REMOVE_REMASTER_FROM_TRACK = bool(s[4]) SessionVars.REMOVE_REMASTER_FROM_TRACK = bool(s[4])
FromFlags.MERGE_ALBUM_VERSIONS = bool(s[5]) SessionVars.MERGE_ALBUM_VERSIONS = bool(s[5])

View File

@ -30,6 +30,12 @@ def validate_albums():
AlbumStore.remove_album(album) AlbumStore.remove_album(album)
bar() bar()
def remove_duplicate_on_merge_versions(tracks: list[Track]) -> list[Track]:
"""
Removes duplicate tracks when merging versions of the same album.
"""
pass
def sort_by_track_no(tracks: list[Track]) -> list[dict[str, Any]]: def sort_by_track_no(tracks: list[Track]) -> list[dict[str, Any]]:
tracks = [asdict(t) for t in tracks] tracks = [asdict(t) for t in tracks]

View File

@ -1,13 +1,13 @@
import dataclasses import dataclasses
from dataclasses import dataclass
import datetime import datetime
from dataclasses import dataclass
from app.settings import SessionVarKeys, get_flag
from .track import Track
from .artist import Artist
from ..utils.hashing import create_hash from ..utils.hashing import create_hash
from ..utils.parsers import parse_feat_from_title, get_base_title_and_versions from ..utils.parsers import get_base_title_and_versions, parse_feat_from_title
from .artist import Artist
from app.settings import get_flag, ParserFlags from .track import Track
@dataclass(slots=True) @dataclass(slots=True)
@ -44,7 +44,7 @@ class Album:
self.image = self.albumhash + ".webp" self.image = self.albumhash + ".webp"
# Fetch album artists from title # Fetch album artists from title
if get_flag(ParserFlags.EXTRACT_FEAT): if get_flag(SessionVarKeys.EXTRACT_FEAT):
featured, self.title = parse_feat_from_title(self.title) featured, self.title = parse_feat_from_title(self.title)
if len(featured) > 0: if len(featured) > 0:
@ -58,8 +58,8 @@ class Album:
TrackStore.append_track_artists(self.albumhash, featured, self.title) TrackStore.append_track_artists(self.albumhash, featured, self.title)
# Handle album version data # Handle album version data
if get_flag(ParserFlags.CLEAN_ALBUM_TITLE): if get_flag(SessionVarKeys.CLEAN_ALBUM_TITLE):
get_versions = not get_flag(ParserFlags.MERGE_ALBUM_VERSIONS) get_versions = not get_flag(SessionVarKeys.MERGE_ALBUM_VERSIONS)
self.title, self.versions = get_base_title_and_versions( self.title, self.versions = get_base_title_and_versions(
self.title, get_versions=get_versions self.title, get_versions=get_versions

View File

@ -1,6 +1,6 @@
from dataclasses import dataclass from dataclasses import dataclass
from app.settings import ParserFlags, get_flag from app.settings import SessionVarKeys, get_flag
from app.utils.hashing import create_hash from app.utils.hashing import create_hash
from app.utils.parsers import ( from app.utils.parsers import (
clean_title, clean_title,
@ -41,6 +41,10 @@ class Track:
artist_hashes: str = "" artist_hashes: str = ""
is_favorite: bool = False is_favorite: bool = False
# temporary attributes
_pos: int = 0 # for sorting tracks by disc and track number
_ati: str = "" # (album track identifier) for removing duplicates when merging album versions
og_title: str = "" og_title: str = ""
og_album: str = "" og_album: str = ""
@ -53,31 +57,31 @@ class Track:
artists = split_artists(self.artists) artists = split_artists(self.artists)
new_title = self.title new_title = self.title
if get_flag(ParserFlags.EXTRACT_FEAT): if get_flag(SessionVarKeys.EXTRACT_FEAT):
featured, new_title = parse_feat_from_title(self.title) featured, new_title = parse_feat_from_title(self.title)
original_lower = "-".join([create_hash(a) for a in artists]) original_lower = "-".join([create_hash(a) for a in artists])
artists.extend( artists.extend(
[a for a in featured if create_hash(a) not in original_lower] [a for a in featured if create_hash(a) not in original_lower]
) )
if get_flag(ParserFlags.REMOVE_PROD): if get_flag(SessionVarKeys.REMOVE_PROD):
new_title = remove_prod(new_title) new_title = remove_prod(new_title)
# if track is a single # if track is a single
if self.og_title == self.album: if self.og_title == self.album:
self.rename_album(new_title) self.rename_album(new_title)
if get_flag(ParserFlags.REMOVE_REMASTER_FROM_TRACK): if get_flag(SessionVarKeys.REMOVE_REMASTER_FROM_TRACK):
new_title = clean_title(new_title) new_title = clean_title(new_title)
self.title = new_title self.title = new_title
if get_flag(ParserFlags.CLEAN_ALBUM_TITLE): if get_flag(SessionVarKeys.CLEAN_ALBUM_TITLE):
self.album, _ = get_base_title_and_versions( self.album, _ = get_base_title_and_versions(
self.album, get_versions=False self.album, get_versions=False
) )
if get_flag(ParserFlags.MERGE_ALBUM_VERSIONS): if get_flag(SessionVarKeys.MERGE_ALBUM_VERSIONS):
self.recreate_albumhash() self.recreate_albumhash()
self.artist_hashes = "-".join(create_hash(a, decode=True) for a in artists) self.artist_hashes = "-".join(create_hash(a, decode=True) for a in artists)

View File

@ -4,7 +4,7 @@ This module contains functions for the server
import time import time
from app.lib.populate import Populate, PopulateCancelledError from app.lib.populate import Populate, PopulateCancelledError
from app.settings import ParserFlags, get_flag, get_scan_sleep_time from app.settings import SessionVarKeys, get_flag, get_scan_sleep_time
from app.utils.generators import get_random_str from app.utils.generators import get_random_str
from app.utils.threading import background from app.utils.threading import background
@ -20,7 +20,7 @@ def run_periodic_scans():
run_periodic_scan = True run_periodic_scan = True
while run_periodic_scan: while run_periodic_scan:
run_periodic_scan = get_flag(ParserFlags.DO_PERIODIC_SCANS) run_periodic_scan = get_flag(SessionVarKeys.DO_PERIODIC_SCANS)
try: try:
Populate(instance_key=get_random_str()) Populate(instance_key=get_random_str())

View File

@ -10,19 +10,14 @@ Usage: swingmusic [options]
Options: Options:
{', '.join(args.help)}: Show this help message {', '.join(args.help)}: Show this help message
{', '.join(args.version)}: Show the app version {', '.join(args.version)}: Show the app version
{args.host}: Set the host {args.host}: Set the host
{args.port}: Set the port {args.port}: Set the port
{args.config}: Set the config path {args.config}: Set the config path
{', '.join(args.show_feat)}: Do not extract featured artists from the song title
{', '.join(args.show_prod)}: Do not hide producers in the song title
{', '.join(args.dont_clean_albums)}: Don't clean album titles. Cleaning is done by removing information in
parentheses and showing it separately
{', '.join(args.dont_clean_tracks)}: Don't remove remaster information from track titles
{', '.join(args.no_periodic_scan)}: Disable periodic scan {', '.join(args.no_periodic_scan)}: Disable periodic scan
{', '.join(args.periodic_scan_interval)}: Set the periodic scan interval in seconds. Default is 300 seconds (5 {', '.join(args.periodic_scan_interval)}: Set the periodic scan interval in seconds. Default is 300 seconds (5
minutes) minutes)
{args.build}: Build the application (in development) {args.build}: Build the application (in development)
""" """

View File

@ -21,7 +21,7 @@ def serialize_track(track: Track, to_remove: set = {}, remove_disc=True) -> dict
props.remove("disc") props.remove("disc")
props.remove("track") props.remove("track")
props.update(key for key in album_dict.keys() if key.startswith("is_")) props.update(key for key in album_dict.keys() if key.startswith(("is_", "_")))
props.remove("is_favorite") props.remove("is_favorite")
for key in props: for key in props:

View File

@ -150,7 +150,11 @@ class ALLARGS:
version = ("--version", "-v") version = ("--version", "-v")
class FromFlags: class SessionVars:
"""
Variables that can be altered per session.
"""
EXTRACT_FEAT = True EXTRACT_FEAT = True
""" """
Whether to extract the featured artists from the song title. Whether to extract the featured artists from the song title.
@ -165,14 +169,17 @@ class FromFlags:
REMOVE_REMASTER_FROM_TRACK = True REMOVE_REMASTER_FROM_TRACK = True
DO_PERIODIC_SCANS = True DO_PERIODIC_SCANS = True
PERIODIC_SCAN_INTERVAL = 300 # seconds PERIODIC_SCAN_INTERVAL = 600 # 10 minutes
"""
The interval between periodic scans in seconds.
"""
MERGE_ALBUM_VERSIONS = False MERGE_ALBUM_VERSIONS = False
ARTIST_SEPARATORS = set() ARTIST_SEPARATORS = set()
# TODO: Find a way to eliminate this class without breaking typings # TODO: Find a way to eliminate this class without breaking typings
class ParserFlags: class SessionVarKeys:
EXTRACT_FEAT = "EXTRACT_FEAT" EXTRACT_FEAT = "EXTRACT_FEAT"
REMOVE_PROD = "REMOVE_PROD" REMOVE_PROD = "REMOVE_PROD"
CLEAN_ALBUM_TITLE = "CLEAN_ALBUM_TITLE" CLEAN_ALBUM_TITLE = "CLEAN_ALBUM_TITLE"
@ -183,16 +190,16 @@ class ParserFlags:
ARTIST_SEPARATORS = "ARTIST_SEPARATORS" ARTIST_SEPARATORS = "ARTIST_SEPARATORS"
def get_flag(flag: ParserFlags) -> bool: def get_flag(key: SessionVarKeys) -> bool:
return getattr(FromFlags, flag) return getattr(SessionVars, key)
def set_flag(flag: ParserFlags, value: Any): def set_flag(key: SessionVarKeys, value: Any):
setattr(FromFlags, flag, value) setattr(SessionVars, key, value)
def get_scan_sleep_time() -> int: def get_scan_sleep_time() -> int:
return FromFlags.PERIODIC_SCAN_INTERVAL return SessionVars.PERIODIC_SCAN_INTERVAL
class TCOLOR: class TCOLOR:

View File

@ -1,6 +1,7 @@
import os import os
from app.settings import TCOLOR, Release, FLASKVARS, Paths, get_flag, ParserFlags from app.settings import (FLASKVARS, TCOLOR, Paths, Release, SessionVarKeys,
get_flag)
from app.utils.network import get_ip from app.utils.network import get_ip
@ -30,11 +31,11 @@ def log_startup_info():
to_print = [ to_print = [
[ [
"Extract featured artists from titles", "Extract featured artists from titles",
get_flag(ParserFlags.EXTRACT_FEAT) get_flag(SessionVarKeys.EXTRACT_FEAT)
], ],
[ [
"Remove prod. from titles", "Remove prod. from titles",
get_flag(ParserFlags.REMOVE_PROD) get_flag(SessionVarKeys.REMOVE_PROD)
] ]
] ]

View File

@ -157,7 +157,7 @@ class TrackStore:
Returns all tracks matching the given album hash. Returns all tracks matching the given album hash.
""" """
tracks = [t for t in cls.tracks if t.albumhash == album_hash] tracks = [t for t in cls.tracks if t.albumhash == album_hash]
return remove_duplicates(tracks) return remove_duplicates(tracks, is_album_tracks=True)
@classmethod @classmethod
def get_tracks_by_artisthash(cls, artisthash: str): def get_tracks_by_artisthash(cls, artisthash: str):

View File

@ -1,14 +1,14 @@
import re import re
from app.enums.album_versions import AlbumVersionEnum from app.enums.album_versions import AlbumVersionEnum
from app.settings import get_flag, ParserFlags from app.settings import SessionVarKeys, get_flag
def split_artists(src: str): def split_artists(src: str):
""" """
Splits a string of artists into a list of artists. Splits a string of artists into a list of artists.
""" """
separators: set = get_flag(ParserFlags.ARTIST_SEPARATORS) separators: set = get_flag(SessionVarKeys.ARTIST_SEPARATORS)
separators = separators.union({","}) separators = separators.union({","})
for sep in separators: for sep in separators:

View File

@ -2,21 +2,48 @@ from collections import defaultdict
from operator import attrgetter from operator import attrgetter
from app.models import Track from app.models import Track
from app.utils.hashing import create_hash
def remove_duplicates(tracks: list[Track]) -> list[Track]: def remove_duplicates(tracks: list[Track], is_album_tracks=False) -> list[Track]:
""" """
Remove duplicates from a list of Track objects based on the trackhash attribute. Remove duplicates from a list of Track objects based on the trackhash attribute.
Retain objects with the highest bitrate. Retain objects with the highest bitrate.
""" """
hash_to_tracks = defaultdict(list) tracks_dict = defaultdict(list)
# if is_album_tracks, sort by disc and track number
if is_album_tracks:
for t in tracks:
# _pos is used for sorting tracks by disc and track number
t._pos = int(f"{t.disc}{str(t.track).zfill(3)}")
# _ati is used to remove duplicates when merging album versions
t._ati = f"{t._pos}{create_hash(t.title)}"
# create groups of tracks with the same _ati
for track in tracks:
tracks_dict[track._ati].append(track)
tracks = []
# pick the track with max bitrate for each group
for track_group in tracks_dict.values():
max_bitrate_track = max(track_group, key=attrgetter("bitrate"))
tracks.append(max_bitrate_track)
return sorted(tracks, key=lambda t: t._pos)
# else, sort by trackhash
for track in tracks: for track in tracks:
hash_to_tracks[track.trackhash].append(track) # create groups of tracks with the same trackhash
tracks_dict[track.trackhash].append(track)
tracks = [] tracks = []
for track_group in hash_to_tracks.values(): # pick the track with max bitrate for each trackhash group
for track_group in tracks_dict.values():
max_bitrate_track = max(track_group, key=attrgetter("bitrate")) max_bitrate_track = max(track_group, key=attrgetter("bitrate"))
tracks.append(max_bitrate_track) tracks.append(max_bitrate_track)