check for doubles
This commit is contained in:
+11
-6
@@ -167,16 +167,19 @@ def fetch_all_books(cfg: CalibreConfig) -> list[dict]:
|
|||||||
while True:
|
while True:
|
||||||
resp = client._session.get(
|
resp = client._session.get(
|
||||||
f"{cfg.url}/ajax/listbooks",
|
f"{cfg.url}/ajax/listbooks",
|
||||||
params={"start": start, "length": page_size, "sort": "title", "order": "asc"},
|
params={"draw": 1, "start": start, "length": page_size, "sort": "title", "order": "asc"},
|
||||||
timeout=30,
|
timeout=30,
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
rows = data.get("rows", [])
|
# Calibre-Web uses DataTables format: "data"/"recordsTotal", older versions use "rows"/"total_count"
|
||||||
|
rows = data.get("rows") or data.get("data") or []
|
||||||
|
total = data.get("total_count") or data.get("recordsTotal") or data.get("recordsFiltered") or 0
|
||||||
all_books.extend(rows)
|
all_books.extend(rows)
|
||||||
if start + page_size >= data.get("total_count", 0):
|
log.info("Books fetched: %d so far (page start=%d, page_size=%d, total=%d)", len(all_books), start, len(rows), total)
|
||||||
|
if not rows or len(all_books) >= total:
|
||||||
break
|
break
|
||||||
start += page_size
|
start += len(rows)
|
||||||
return all_books
|
return all_books
|
||||||
|
|
||||||
|
|
||||||
@@ -199,8 +202,10 @@ def find_duplicate_groups(books: list[dict]) -> list[list[dict]]:
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
groups: dict[str, list[dict]] = defaultdict(list)
|
groups: dict[str, list[dict]] = defaultdict(list)
|
||||||
for book in books:
|
for book in books:
|
||||||
words = _normalize_words(book.get("title", ""))
|
title = book.get("title", "")
|
||||||
key = " ".join(sorted(words))
|
# Normalise: lowercase, strip punctuation and extra whitespace — no word removal
|
||||||
|
key = re.sub(r"[^\w\s]", " ", title.lower())
|
||||||
|
key = re.sub(r"\s+", " ", key).strip()
|
||||||
if key:
|
if key:
|
||||||
groups[key].append(book)
|
groups[key].append(book)
|
||||||
return sorted(
|
return sorted(
|
||||||
|
|||||||
Reference in New Issue
Block a user