From 6a43394404387bb490f1bbbf9ac349cd8982737c Mon Sep 17 00:00:00 2001 From: grymphen Date: Tue, 12 May 2026 12:47:07 +0200 Subject: [PATCH] check for doubles --- uploader.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/uploader.py b/uploader.py index 49e5d25..bc3a832 100644 --- a/uploader.py +++ b/uploader.py @@ -162,13 +162,14 @@ def fetch_all_books(cfg: CalibreConfig) -> list[dict]: client = CalibreClient(cfg) client._ensure_auth() all_books: list[dict] = [] - page_size = 100 + seen_ids: set = set() + page_size = 1000 start = 0 while True: resp = client._session.get( f"{cfg.url}/ajax/listbooks", params={"draw": 1, "start": start, "length": page_size, "sort": "title", "order": "asc"}, - timeout=30, + timeout=60, ) resp.raise_for_status() data = resp.json() @@ -181,20 +182,19 @@ def fetch_all_books(cfg: CalibreConfig) -> list[dict]: data.get("recordsTotal") or data.get("total_count") or data.get("total") or data.get("totalNotFiltered") or 0 ) - all_books.extend(rows) - log.info("Books fetched: %d / %d", len(all_books), total) - if not rows or len(all_books) >= total: + new_in_page = 0 + for b in rows: + bid = b.get("id") + if bid not in seen_ids: + seen_ids.add(bid) + all_books.append(b) + new_in_page += 1 + log.info("Books fetched: %d / %d (page gave %d new)", len(all_books), total, new_in_page) + # Stop when: empty page, no new books (start is being ignored), or we've seen everything + if not rows or new_in_page == 0 or len(all_books) >= total: break start += len(rows) - # Deduplicate by ID in case of page-boundary overlap in the API response - seen: set[int] = set() - unique: list[dict] = [] - for b in all_books: - bid = b.get("id") - if bid is None or bid not in seen: - seen.add(bid) - unique.append(b) - return unique + return all_books def delete_book(cfg: CalibreConfig, book_id: int, client: "CalibreClient | None" = None) -> tuple[bool, str]: