check for doubles

This commit is contained in:
2026-05-10 21:23:04 +02:00
parent 5dc01b002e
commit ef8dc6b838
+11 -6
View File
@@ -167,16 +167,19 @@ def fetch_all_books(cfg: CalibreConfig) -> list[dict]:
while True: while True:
resp = client._session.get( resp = client._session.get(
f"{cfg.url}/ajax/listbooks", f"{cfg.url}/ajax/listbooks",
params={"start": start, "length": page_size, "sort": "title", "order": "asc"}, params={"draw": 1, "start": start, "length": page_size, "sort": "title", "order": "asc"},
timeout=30, timeout=30,
) )
resp.raise_for_status() resp.raise_for_status()
data = resp.json() data = resp.json()
rows = data.get("rows", []) # Calibre-Web uses DataTables format: "data"/"recordsTotal", older versions use "rows"/"total_count"
rows = data.get("rows") or data.get("data") or []
total = data.get("total_count") or data.get("recordsTotal") or data.get("recordsFiltered") or 0
all_books.extend(rows) all_books.extend(rows)
if start + page_size >= data.get("total_count", 0): log.info("Books fetched: %d so far (page start=%d, page_size=%d, total=%d)", len(all_books), start, len(rows), total)
if not rows or len(all_books) >= total:
break break
start += page_size start += len(rows)
return all_books return all_books
@@ -199,8 +202,10 @@ def find_duplicate_groups(books: list[dict]) -> list[list[dict]]:
from collections import defaultdict from collections import defaultdict
groups: dict[str, list[dict]] = defaultdict(list) groups: dict[str, list[dict]] = defaultdict(list)
for book in books: for book in books:
words = _normalize_words(book.get("title", "")) title = book.get("title", "")
key = " ".join(sorted(words)) # Normalise: lowercase, strip punctuation and extra whitespace — no word removal
key = re.sub(r"[^\w\s]", " ", title.lower())
key = re.sub(r"\s+", " ", key).strip()
if key: if key:
groups[key].append(book) groups[key].append(book)
return sorted( return sorted(