diff --git a/uploader.py b/uploader.py index dcb3559..b965f7a 100644 --- a/uploader.py +++ b/uploader.py @@ -167,16 +167,19 @@ def fetch_all_books(cfg: CalibreConfig) -> list[dict]: while True: resp = client._session.get( f"{cfg.url}/ajax/listbooks", - params={"start": start, "length": page_size, "sort": "title", "order": "asc"}, + params={"draw": 1, "start": start, "length": page_size, "sort": "title", "order": "asc"}, timeout=30, ) resp.raise_for_status() data = resp.json() - rows = data.get("rows", []) + # Calibre-Web uses DataTables format: "data"/"recordsTotal", older versions use "rows"/"total_count" + rows = data.get("rows") or data.get("data") or [] + total = data.get("total_count") or data.get("recordsTotal") or data.get("recordsFiltered") or 0 all_books.extend(rows) - if start + page_size >= data.get("total_count", 0): + log.info("Books fetched: %d so far (page start=%d, page_size=%d, total=%d)", len(all_books), start, len(rows), total) + if not rows or len(all_books) >= total: break - start += page_size + start += len(rows) return all_books @@ -199,8 +202,10 @@ def find_duplicate_groups(books: list[dict]) -> list[list[dict]]: from collections import defaultdict groups: dict[str, list[dict]] = defaultdict(list) for book in books: - words = _normalize_words(book.get("title", "")) - key = " ".join(sorted(words)) + title = book.get("title", "") + # Normalise: lowercase, strip punctuation and extra whitespace — no word removal + key = re.sub(r"[^\w\s]", " ", title.lower()) + key = re.sub(r"\s+", " ", key).strip() if key: groups[key].append(book) return sorted(