check for doubles

This commit is contained in:
2026-05-10 21:13:01 +02:00
parent 877c4c9d67
commit a7ab2db9f2
4 changed files with 163 additions and 0 deletions
+29
View File
@@ -13,6 +13,7 @@ import db
import sftp as sftp_module import sftp as sftp_module
import sync import sync
import uploader import uploader
from uploader import delete_book, fetch_all_books, find_duplicate_groups
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s%(message)s") logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s%(message)s")
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -185,6 +186,34 @@ async def test_calibre():
return {"ok": ok, "message": message} return {"ok": ok, "message": message}
# --- Duplicates ---
@app.get("/duplicates", response_class=HTMLResponse)
async def duplicates_page(request: Request):
cfg = config.load()
error = None
groups: list = []
total_books = 0
try:
books = fetch_all_books(cfg.calibre)
total_books = len(books)
groups = find_duplicate_groups(books)
except Exception as e:
error = str(e)
return templates.TemplateResponse(request, "duplicates.html", {
"groups": groups,
"total_books": total_books,
"error": error,
})
@app.post("/api/delete_book/{book_id}")
async def delete_book_api(book_id: int):
cfg = config.load()
ok, message = delete_book(cfg.calibre, book_id)
return {"ok": ok, "message": message}
# --- Data reset --- # --- Data reset ---
@app.post("/settings/reset-sync-data") @app.post("/settings/reset-sync-data")
+1
View File
@@ -11,6 +11,7 @@
<a href="/" class="brand">CalibreSync</a> <a href="/" class="brand">CalibreSync</a>
<a href="/">Dashboard</a> <a href="/">Dashboard</a>
<a href="/books">Books</a> <a href="/books">Books</a>
<a href="/duplicates">Duplicates</a>
<a href="/settings">Settings</a> <a href="/settings">Settings</a>
</nav> </nav>
<main> <main>
+81
View File
@@ -0,0 +1,81 @@
{% extends "base.html" %}
{% block title %}Duplicates — CalibreSync{% endblock %}
{% block content %}
<div class="page-header">
<h1>Duplicate books in Calibre-Web</h1>
</div>
{% if error %}
<div class="alert alert-warning">Could not fetch books from Calibre-Web: {{ error }}</div>
{% else %}
<p class="muted small" style="margin-bottom:1.5rem">
Scanned <strong>{{ total_books }}</strong> book(s) —
{% if groups %}
found <strong>{{ groups|length }}</strong> duplicate group(s).
Books are grouped by normalised title. Keep the one you want and delete the rest.
{% else %}
no duplicates found.
{% endif %}
</p>
{% for group in groups %}
<div class="form-section" style="margin-bottom:1rem">
<h3 style="margin-top:0">{{ group[0].title }}</h3>
<table>
<thead>
<tr>
<th>ID</th>
<th>Title</th>
<th>Authors</th>
<th>Format</th>
<th></th>
</tr>
</thead>
<tbody>
{% for book in group %}
<tr id="row-{{ book.id }}">
<td class="mono muted">{{ book.id }}</td>
<td>{{ book.title }}</td>
<td>{{ book.authors }}</td>
<td>{{ book.format or "—" }}</td>
<td>
<button class="btn btn-danger" style="padding:0.2rem 0.7rem;font-size:0.85rem"
onclick="deleteBook({{ book.id }}, this)">Delete</button>
<span id="status-{{ book.id }}" class="muted small" style="margin-left:0.5rem"></span>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endfor %}
{% endif %}
<script>
async function deleteBook(id, btn) {
if (!confirm('Delete book ID ' + id + ' from Calibre-Web?')) return;
btn.disabled = true;
btn.textContent = 'Deleting…';
const status = document.getElementById('status-' + id);
try {
const r = await fetch('/api/delete_book/' + id, {method: 'POST'});
const data = await r.json();
if (data.ok) {
document.getElementById('row-' + id).style.opacity = '0.35';
btn.textContent = 'Deleted';
status.textContent = '✓';
} else {
btn.disabled = false;
btn.textContent = 'Delete';
status.textContent = 'Failed: ' + data.message;
status.style.color = 'var(--error, #f87171)';
}
} catch (e) {
btn.disabled = false;
btn.textContent = 'Delete';
status.textContent = 'Error: ' + e;
}
}
</script>
{% endblock %}
+52
View File
@@ -157,6 +157,58 @@ class CalibreClient:
return "error" return "error"
def fetch_all_books(cfg: CalibreConfig) -> list[dict]:
"""Fetch every book from Calibre-Web via /ajax/listbooks. Returns raw row dicts."""
client = CalibreClient(cfg)
client._ensure_auth()
all_books: list[dict] = []
page_size = 100
start = 0
while True:
resp = client._session.get(
f"{cfg.url}/ajax/listbooks",
params={"start": start, "length": page_size, "sort": "title", "order": "asc"},
timeout=30,
)
resp.raise_for_status()
data = resp.json()
rows = data.get("rows", [])
all_books.extend(rows)
if start + page_size >= data.get("total_count", 0):
break
start += page_size
return all_books
def delete_book(cfg: CalibreConfig, book_id: int) -> tuple[bool, str]:
"""Delete a book from Calibre-Web by ID."""
client = CalibreClient(cfg)
client._ensure_auth()
resp = client._session.post(
f"{cfg.url}/delete/{book_id}",
data={"csrf_token": client._upload_csrf} if client._upload_csrf else {},
timeout=30,
)
if resp.ok:
return True, "Deleted"
return False, f"HTTP {resp.status_code}"
def find_duplicate_groups(books: list[dict]) -> list[list[dict]]:
"""Group books by normalised title; return only groups with 2+ entries."""
from collections import defaultdict
groups: dict[str, list[dict]] = defaultdict(list)
for book in books:
words = _normalize_words(book.get("title", ""))
key = " ".join(sorted(words))
if key:
groups[key].append(book)
return sorted(
[g for g in groups.values() if len(g) > 1],
key=lambda g: g[0].get("title", "").lower(),
)
def test_connection(cfg: CalibreConfig) -> tuple[bool, str]: def test_connection(cfg: CalibreConfig) -> tuple[bool, str]:
try: try:
client = CalibreClient(cfg) client = CalibreClient(cfg)