mirror of
https://github.com/tcsenpai/swingmusic.git
synced 2025-06-06 03:05:35 +00:00
43 lines
1.2 KiB
Python
43 lines
1.2 KiB
Python
import hashlib
|
|
|
|
from unidecode import unidecode
|
|
|
|
|
|
def create_hash(*args: str, decode=False, limit=10) -> str:
|
|
"""
|
|
This function creates a case-insensitive, non-alphanumeric chars ignoring hash from the given arguments.
|
|
|
|
Example use case:
|
|
- Creating computable IDs for duplicate artists. eg. Juice WRLD and Juice Wrld should have the same ID.
|
|
|
|
:param args: The arguments to hash.
|
|
:param decode: Whether to decode the arguments before hashing.
|
|
:param limit: The number of characters to return.
|
|
|
|
:return: The hash.
|
|
"""
|
|
|
|
def remove_non_alnum(token: str) -> str:
|
|
token = token.lower().strip().replace(" ", "")
|
|
t = "".join(t for t in token if t.isalnum())
|
|
|
|
if t == "":
|
|
return token
|
|
|
|
return t
|
|
|
|
str_ = "".join(remove_non_alnum(t) for t in args)
|
|
|
|
if decode:
|
|
str_ = unidecode(str_)
|
|
|
|
str_ = str_.encode("utf-8")
|
|
str_ = hashlib.sha1(str_).hexdigest()
|
|
# REVIEW Switched to sha1 hashlib.sha256(str_).hexdigest()
|
|
|
|
# REVIEW Take the first limit/2 and last limit/2 characters
|
|
# This is to avoid collisions
|
|
return str_[:limit // 2] + str_[-limit // 2:] if limit % 2 == 0 else str_[:limit // 2] + str_[-limit // 2 - 1:]
|
|
|
|
# return str_[-limit:]
|