check for doubles

This commit is contained in:
2026-05-10 23:02:59 +02:00
parent 2034bd5f2b
commit 09afe40f04
3 changed files with 97 additions and 8 deletions
+7 -6
View File
@@ -198,15 +198,16 @@ def delete_book(cfg: CalibreConfig, book_id: int) -> tuple[bool, str]:
def find_duplicate_groups(books: list[dict]) -> list[list[dict]]:
"""Group books by normalised title; return only groups with 2+ entries."""
"""Group books by normalised title+author; return only groups with 2+ entries."""
from collections import defaultdict
groups: dict[str, list[dict]] = defaultdict(list)
for book in books:
title = book.get("title", "")
# Normalise: lowercase, strip punctuation and extra whitespace — no word removal
key = re.sub(r"[^\w\s]", " ", title.lower())
key = re.sub(r"\s+", " ", key).strip()
if key:
title = re.sub(r"[^\w\s]", " ", book.get("title", "").lower())
title = re.sub(r"\s+", " ", title).strip()
authors = re.sub(r"[^\w\s]", " ", book.get("authors", "").lower())
authors = re.sub(r"\s+", " ", authors).strip()
key = f"{title}||{authors}"
if title:
groups[key].append(book)
return sorted(
[g for g in groups.values() if len(g) > 1],