check for doubles
This commit is contained in:
+69
-8
@@ -158,21 +158,20 @@ class CalibreClient:
|
||||
|
||||
|
||||
def fetch_all_books(cfg: CalibreConfig) -> list[dict]:
|
||||
"""Fetch every book from Calibre-Web via /ajax/listbooks. Returns raw row dicts."""
|
||||
"""Fetch every book from Calibre-Web. Tries /ajax/listbooks first; falls back to OPDS if pagination is broken."""
|
||||
client = CalibreClient(cfg)
|
||||
client._ensure_auth()
|
||||
all_books: list[dict] = []
|
||||
seen_ids: set = set()
|
||||
page_size = 1000
|
||||
start = 0
|
||||
reported_total = 0
|
||||
while True:
|
||||
resp = client._session.get(
|
||||
f"{cfg.url}/ajax/listbooks",
|
||||
params={
|
||||
"draw": 1,
|
||||
# DataTables 1.10+ names
|
||||
"start": start, "length": page_size,
|
||||
# DataTables 1.9.x names (older Calibre-Web)
|
||||
"iDisplayStart": start, "iDisplayLength": page_size,
|
||||
},
|
||||
timeout=60,
|
||||
@@ -182,9 +181,8 @@ def fetch_all_books(cfg: CalibreConfig) -> list[dict]:
|
||||
if start == 0:
|
||||
non_list = {k: v for k, v in data.items() if not isinstance(v, list)}
|
||||
log.info("listbooks page-0 meta fields: %s", non_list)
|
||||
# Calibre-Web uses DataTables format: "data"/"recordsTotal", older versions use "rows"/"total_count"
|
||||
rows = data.get("rows") or data.get("data") or []
|
||||
total = (
|
||||
reported_total = (
|
||||
data.get("recordsTotal") or data.get("total_count") or
|
||||
data.get("total") or data.get("totalNotFiltered") or 0
|
||||
)
|
||||
@@ -195,14 +193,77 @@ def fetch_all_books(cfg: CalibreConfig) -> list[dict]:
|
||||
seen_ids.add(bid)
|
||||
all_books.append(b)
|
||||
new_in_page += 1
|
||||
log.info("Books fetched: %d / %d (page gave %d new)", len(all_books), total, new_in_page)
|
||||
# Stop when: empty page, no new books (start is being ignored), or we've seen everything
|
||||
if not rows or new_in_page == 0 or len(all_books) >= total:
|
||||
log.info("Books fetched: %d / %d (page gave %d new)", len(all_books), reported_total, new_in_page)
|
||||
if not rows or new_in_page == 0 or len(all_books) >= reported_total:
|
||||
break
|
||||
start += len(rows)
|
||||
|
||||
# If we got far fewer books than reported, listbooks pagination is broken — use OPDS instead
|
||||
if reported_total > 0 and len(all_books) < reported_total // 2:
|
||||
log.warning(
|
||||
"listbooks pagination broken (%d/%d books retrieved). Falling back to OPDS.",
|
||||
len(all_books), reported_total,
|
||||
)
|
||||
return _fetch_all_books_opds(cfg)
|
||||
return all_books
|
||||
|
||||
|
||||
def _fetch_all_books_opds(cfg: CalibreConfig) -> list[dict]:
|
||||
"""Fetch all books via OPDS catalog, following next-page links."""
|
||||
import xml.etree.ElementTree as ET
|
||||
books: list[dict] = []
|
||||
seen_ids: set = set()
|
||||
url: str | None = f"{cfg.url}/opds/new"
|
||||
auth = (cfg.user, cfg.password)
|
||||
session = requests.Session()
|
||||
|
||||
while url:
|
||||
resp = session.get(url, auth=auth, timeout=30)
|
||||
if not resp.ok:
|
||||
log.warning("OPDS fetch failed HTTP %s — %s", resp.status_code, url)
|
||||
break
|
||||
try:
|
||||
root = ET.fromstring(resp.content)
|
||||
except ET.ParseError as exc:
|
||||
log.warning("OPDS XML parse error: %s", exc)
|
||||
break
|
||||
|
||||
next_url: str | None = None
|
||||
entries_this_page = 0
|
||||
for elem in root:
|
||||
local = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
||||
if local == "link" and elem.get("rel") == "next":
|
||||
href = elem.get("href", "")
|
||||
next_url = href if href.startswith("http") else f"{cfg.url}{href}"
|
||||
elif local == "entry":
|
||||
entries_this_page += 1
|
||||
title = ""
|
||||
author_parts: list[str] = []
|
||||
book_id: int | None = None
|
||||
for child in elem:
|
||||
ctag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
|
||||
if ctag == "title":
|
||||
title = child.text or ""
|
||||
elif ctag == "author":
|
||||
for gc in child:
|
||||
if (gc.tag.split("}")[-1] if "}" in gc.tag else gc.tag) == "name":
|
||||
author_parts.append(gc.text or "")
|
||||
elif ctag == "link":
|
||||
m = re.search(r"/download/(\d+)/", child.get("href", ""))
|
||||
if m and book_id is None:
|
||||
book_id = int(m.group(1))
|
||||
if book_id and book_id not in seen_ids:
|
||||
seen_ids.add(book_id)
|
||||
books.append({"id": book_id, "title": title, "authors": " & ".join(author_parts)})
|
||||
|
||||
log.info("OPDS fetched: %d books total (page had %d entries)", len(books), entries_this_page)
|
||||
if not entries_this_page:
|
||||
break
|
||||
url = next_url
|
||||
|
||||
return books
|
||||
|
||||
|
||||
def delete_book(cfg: CalibreConfig, book_id: int, client: "CalibreClient | None" = None) -> tuple[bool, str]:
|
||||
"""Delete a book from Calibre-Web by ID. Pass a pre-authenticated client to avoid re-auth overhead."""
|
||||
if client is None:
|
||||
|
||||
Reference in New Issue
Block a user