diff --git a/main.py b/main.py index c6c0d93..4d5c52a 100644 --- a/main.py +++ b/main.py @@ -214,6 +214,51 @@ async def delete_book_api(book_id: int): return {"ok": ok, "message": message} +_dedup_state: dict = {"running": False, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None} + + +def _run_dedup(): + global _dedup_state + try: + cfg = config.load() + log.info("Dedup: fetching all books ...") + books = fetch_all_books(cfg.calibre) + groups = find_duplicate_groups(books) + to_delete = [b for group in groups for b in sorted(group, key=lambda x: x.get("id", 0))[1:]] + _dedup_state.update({"total": len(to_delete), "deleted": 0, "failed": 0}) + log.info("Dedup: %d duplicate(s) to delete across %d group(s)", len(to_delete), len(groups)) + for book in to_delete: + ok, msg = delete_book(cfg.calibre, book["id"]) + if ok: + _dedup_state["deleted"] += 1 + else: + _dedup_state["failed"] += 1 + log.warning("Dedup: failed to delete book %d: %s", book["id"], msg) + if _dedup_state["deleted"] % 10 == 0: + log.info("Dedup progress: %d / %d deleted", _dedup_state["deleted"], _dedup_state["total"]) + log.info("Dedup done: %d deleted, %d failed", _dedup_state["deleted"], _dedup_state["failed"]) + except Exception as e: + log.error("Dedup error: %s", e) + _dedup_state["error"] = str(e) + finally: + _dedup_state["running"] = False + _dedup_state["done"] = True + + +@app.post("/api/delete_duplicates") +async def delete_duplicates_api(background_tasks: BackgroundTasks): + if _dedup_state["running"]: + return {"ok": False, "message": "Already running"} + _dedup_state.update({"running": True, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None}) + background_tasks.add_task(_run_dedup) + return {"ok": True, "message": "Started"} + + +@app.get("/api/delete_duplicates/status") +async def delete_duplicates_status(): + return _dedup_state + + @app.get("/api/debug/calibre_books") async def debug_calibre_books(): """Show raw Calibre-Web listbooks response shape so we can identify field names.""" diff --git a/templates/duplicates.html b/templates/duplicates.html index 542ac3c..09afc6e 100644 --- a/templates/duplicates.html +++ b/templates/duplicates.html @@ -4,16 +4,23 @@ {% block content %} + + {% if error %}
Could not fetch books from Calibre-Web: {{ error }}
{% else %}

Scanned {{ total_books }} book(s) — {% if groups %} - found {{ groups|length }} duplicate group(s). - Books are grouped by normalised title. Keep the one you want and delete the rest. + found {{ groups|length }} duplicate group(s) (same title + author). + The oldest copy (lowest ID) is kept when deleting all. {% else %} no duplicates found. {% endif %} @@ -77,5 +84,41 @@ async function deleteBook(id, btn) { status.textContent = 'Error: ' + e; } } + +async function deleteAll(btn) { + if (!confirm('Delete all duplicates from Calibre-Web, keeping the oldest copy of each title+author? This cannot be undone.')) return; + btn.disabled = true; + btn.textContent = 'Starting…'; + const progress = document.getElementById('dedup-progress'); + progress.style.display = ''; + progress.textContent = 'Fetching book list from Calibre-Web…'; + + await fetch('/api/delete_duplicates', {method: 'POST'}); + + const poll = setInterval(async () => { + const r = await fetch('/api/delete_duplicates/status'); + const s = await r.json(); + if (s.error) { + clearInterval(poll); + progress.textContent = 'Error: ' + s.error; + progress.className = 'alert alert-warning'; + btn.disabled = false; + btn.textContent = 'Delete all duplicates (keep oldest)'; + return; + } + if (s.total > 0) { + progress.textContent = `Deleting… ${s.deleted} / ${s.total} deleted, ${s.failed} failed`; + } else { + progress.textContent = 'Scanning for duplicates…'; + } + if (s.done) { + clearInterval(poll); + progress.textContent = `Done — ${s.deleted} book(s) deleted, ${s.failed} failed. Reload to refresh the list.`; + btn.textContent = 'Reload'; + btn.disabled = false; + btn.onclick = () => location.reload(); + } + }, 2000); +} {% endblock %} diff --git a/uploader.py b/uploader.py index 2bcc79f..fdf1463 100644 --- a/uploader.py +++ b/uploader.py @@ -198,15 +198,16 @@ def delete_book(cfg: CalibreConfig, book_id: int) -> tuple[bool, str]: def find_duplicate_groups(books: list[dict]) -> list[list[dict]]: - """Group books by normalised title; return only groups with 2+ entries.""" + """Group books by normalised title+author; return only groups with 2+ entries.""" from collections import defaultdict groups: dict[str, list[dict]] = defaultdict(list) for book in books: - title = book.get("title", "") - # Normalise: lowercase, strip punctuation and extra whitespace — no word removal - key = re.sub(r"[^\w\s]", " ", title.lower()) - key = re.sub(r"\s+", " ", key).strip() - if key: + title = re.sub(r"[^\w\s]", " ", book.get("title", "").lower()) + title = re.sub(r"\s+", " ", title).strip() + authors = re.sub(r"[^\w\s]", " ", book.get("authors", "").lower()) + authors = re.sub(r"\s+", " ", authors).strip() + key = f"{title}||{authors}" + if title: groups[key].append(book) return sorted( [g for g in groups.values() if len(g) > 1],