check for doubles
This commit is contained in:
+11
-6
@@ -167,16 +167,19 @@ def fetch_all_books(cfg: CalibreConfig) -> list[dict]:
|
||||
while True:
|
||||
resp = client._session.get(
|
||||
f"{cfg.url}/ajax/listbooks",
|
||||
params={"start": start, "length": page_size, "sort": "title", "order": "asc"},
|
||||
params={"draw": 1, "start": start, "length": page_size, "sort": "title", "order": "asc"},
|
||||
timeout=30,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
rows = data.get("rows", [])
|
||||
# Calibre-Web uses DataTables format: "data"/"recordsTotal", older versions use "rows"/"total_count"
|
||||
rows = data.get("rows") or data.get("data") or []
|
||||
total = data.get("total_count") or data.get("recordsTotal") or data.get("recordsFiltered") or 0
|
||||
all_books.extend(rows)
|
||||
if start + page_size >= data.get("total_count", 0):
|
||||
log.info("Books fetched: %d so far (page start=%d, page_size=%d, total=%d)", len(all_books), start, len(rows), total)
|
||||
if not rows or len(all_books) >= total:
|
||||
break
|
||||
start += page_size
|
||||
start += len(rows)
|
||||
return all_books
|
||||
|
||||
|
||||
@@ -199,8 +202,10 @@ def find_duplicate_groups(books: list[dict]) -> list[list[dict]]:
|
||||
from collections import defaultdict
|
||||
groups: dict[str, list[dict]] = defaultdict(list)
|
||||
for book in books:
|
||||
words = _normalize_words(book.get("title", ""))
|
||||
key = " ".join(sorted(words))
|
||||
title = book.get("title", "")
|
||||
# Normalise: lowercase, strip punctuation and extra whitespace — no word removal
|
||||
key = re.sub(r"[^\w\s]", " ", title.lower())
|
||||
key = re.sub(r"\s+", " ", key).strip()
|
||||
if key:
|
||||
groups[key].append(book)
|
||||
return sorted(
|
||||
|
||||
Reference in New Issue
Block a user