check for doubles
This commit is contained in:
@@ -214,6 +214,51 @@ async def delete_book_api(book_id: int):
|
|||||||
return {"ok": ok, "message": message}
|
return {"ok": ok, "message": message}
|
||||||
|
|
||||||
|
|
||||||
|
_dedup_state: dict = {"running": False, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None}
|
||||||
|
|
||||||
|
|
||||||
|
def _run_dedup():
|
||||||
|
global _dedup_state
|
||||||
|
try:
|
||||||
|
cfg = config.load()
|
||||||
|
log.info("Dedup: fetching all books ...")
|
||||||
|
books = fetch_all_books(cfg.calibre)
|
||||||
|
groups = find_duplicate_groups(books)
|
||||||
|
to_delete = [b for group in groups for b in sorted(group, key=lambda x: x.get("id", 0))[1:]]
|
||||||
|
_dedup_state.update({"total": len(to_delete), "deleted": 0, "failed": 0})
|
||||||
|
log.info("Dedup: %d duplicate(s) to delete across %d group(s)", len(to_delete), len(groups))
|
||||||
|
for book in to_delete:
|
||||||
|
ok, msg = delete_book(cfg.calibre, book["id"])
|
||||||
|
if ok:
|
||||||
|
_dedup_state["deleted"] += 1
|
||||||
|
else:
|
||||||
|
_dedup_state["failed"] += 1
|
||||||
|
log.warning("Dedup: failed to delete book %d: %s", book["id"], msg)
|
||||||
|
if _dedup_state["deleted"] % 10 == 0:
|
||||||
|
log.info("Dedup progress: %d / %d deleted", _dedup_state["deleted"], _dedup_state["total"])
|
||||||
|
log.info("Dedup done: %d deleted, %d failed", _dedup_state["deleted"], _dedup_state["failed"])
|
||||||
|
except Exception as e:
|
||||||
|
log.error("Dedup error: %s", e)
|
||||||
|
_dedup_state["error"] = str(e)
|
||||||
|
finally:
|
||||||
|
_dedup_state["running"] = False
|
||||||
|
_dedup_state["done"] = True
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/delete_duplicates")
|
||||||
|
async def delete_duplicates_api(background_tasks: BackgroundTasks):
|
||||||
|
if _dedup_state["running"]:
|
||||||
|
return {"ok": False, "message": "Already running"}
|
||||||
|
_dedup_state.update({"running": True, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None})
|
||||||
|
background_tasks.add_task(_run_dedup)
|
||||||
|
return {"ok": True, "message": "Started"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/delete_duplicates/status")
|
||||||
|
async def delete_duplicates_status():
|
||||||
|
return _dedup_state
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/debug/calibre_books")
|
@app.get("/api/debug/calibre_books")
|
||||||
async def debug_calibre_books():
|
async def debug_calibre_books():
|
||||||
"""Show raw Calibre-Web listbooks response shape so we can identify field names."""
|
"""Show raw Calibre-Web listbooks response shape so we can identify field names."""
|
||||||
|
|||||||
@@ -4,7 +4,14 @@
|
|||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="page-header">
|
<div class="page-header">
|
||||||
<h1>Duplicate books in Calibre-Web</h1>
|
<h1>Duplicate books in Calibre-Web</h1>
|
||||||
|
{% if groups %}
|
||||||
|
<div class="header-actions">
|
||||||
|
<button class="btn btn-danger" onclick="deleteAll(this)">Delete all duplicates (keep oldest)</button>
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="dedup-progress" style="display:none" class="alert alert-success"></div>
|
||||||
|
|
||||||
{% if error %}
|
{% if error %}
|
||||||
<div class="alert alert-warning">Could not fetch books from Calibre-Web: {{ error }}</div>
|
<div class="alert alert-warning">Could not fetch books from Calibre-Web: {{ error }}</div>
|
||||||
@@ -12,8 +19,8 @@
|
|||||||
<p class="muted small" style="margin-bottom:1.5rem">
|
<p class="muted small" style="margin-bottom:1.5rem">
|
||||||
Scanned <strong>{{ total_books }}</strong> book(s) —
|
Scanned <strong>{{ total_books }}</strong> book(s) —
|
||||||
{% if groups %}
|
{% if groups %}
|
||||||
found <strong>{{ groups|length }}</strong> duplicate group(s).
|
found <strong>{{ groups|length }}</strong> duplicate group(s) (same title + author).
|
||||||
Books are grouped by normalised title. Keep the one you want and delete the rest.
|
The oldest copy (lowest ID) is kept when deleting all.
|
||||||
{% else %}
|
{% else %}
|
||||||
no duplicates found.
|
no duplicates found.
|
||||||
{% endif %}
|
{% endif %}
|
||||||
@@ -77,5 +84,41 @@ async function deleteBook(id, btn) {
|
|||||||
status.textContent = 'Error: ' + e;
|
status.textContent = 'Error: ' + e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function deleteAll(btn) {
|
||||||
|
if (!confirm('Delete all duplicates from Calibre-Web, keeping the oldest copy of each title+author? This cannot be undone.')) return;
|
||||||
|
btn.disabled = true;
|
||||||
|
btn.textContent = 'Starting…';
|
||||||
|
const progress = document.getElementById('dedup-progress');
|
||||||
|
progress.style.display = '';
|
||||||
|
progress.textContent = 'Fetching book list from Calibre-Web…';
|
||||||
|
|
||||||
|
await fetch('/api/delete_duplicates', {method: 'POST'});
|
||||||
|
|
||||||
|
const poll = setInterval(async () => {
|
||||||
|
const r = await fetch('/api/delete_duplicates/status');
|
||||||
|
const s = await r.json();
|
||||||
|
if (s.error) {
|
||||||
|
clearInterval(poll);
|
||||||
|
progress.textContent = 'Error: ' + s.error;
|
||||||
|
progress.className = 'alert alert-warning';
|
||||||
|
btn.disabled = false;
|
||||||
|
btn.textContent = 'Delete all duplicates (keep oldest)';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (s.total > 0) {
|
||||||
|
progress.textContent = `Deleting… ${s.deleted} / ${s.total} deleted, ${s.failed} failed`;
|
||||||
|
} else {
|
||||||
|
progress.textContent = 'Scanning for duplicates…';
|
||||||
|
}
|
||||||
|
if (s.done) {
|
||||||
|
clearInterval(poll);
|
||||||
|
progress.textContent = `Done — ${s.deleted} book(s) deleted, ${s.failed} failed. Reload to refresh the list.`;
|
||||||
|
btn.textContent = 'Reload';
|
||||||
|
btn.disabled = false;
|
||||||
|
btn.onclick = () => location.reload();
|
||||||
|
}
|
||||||
|
}, 2000);
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
+7
-6
@@ -198,15 +198,16 @@ def delete_book(cfg: CalibreConfig, book_id: int) -> tuple[bool, str]:
|
|||||||
|
|
||||||
|
|
||||||
def find_duplicate_groups(books: list[dict]) -> list[list[dict]]:
|
def find_duplicate_groups(books: list[dict]) -> list[list[dict]]:
|
||||||
"""Group books by normalised title; return only groups with 2+ entries."""
|
"""Group books by normalised title+author; return only groups with 2+ entries."""
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
groups: dict[str, list[dict]] = defaultdict(list)
|
groups: dict[str, list[dict]] = defaultdict(list)
|
||||||
for book in books:
|
for book in books:
|
||||||
title = book.get("title", "")
|
title = re.sub(r"[^\w\s]", " ", book.get("title", "").lower())
|
||||||
# Normalise: lowercase, strip punctuation and extra whitespace — no word removal
|
title = re.sub(r"\s+", " ", title).strip()
|
||||||
key = re.sub(r"[^\w\s]", " ", title.lower())
|
authors = re.sub(r"[^\w\s]", " ", book.get("authors", "").lower())
|
||||||
key = re.sub(r"\s+", " ", key).strip()
|
authors = re.sub(r"\s+", " ", authors).strip()
|
||||||
if key:
|
key = f"{title}||{authors}"
|
||||||
|
if title:
|
||||||
groups[key].append(book)
|
groups[key].append(book)
|
||||||
return sorted(
|
return sorted(
|
||||||
[g for g in groups.values() if len(g) > 1],
|
[g for g in groups.values() if len(g) > 1],
|
||||||
|
|||||||
Reference in New Issue
Block a user