cwa import
This commit is contained in:
@@ -14,18 +14,11 @@ class SFTPConfig:
|
||||
remote_path: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class CalibreConfig:
|
||||
url: str = ""
|
||||
user: str = ""
|
||||
password: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppConfig:
|
||||
sftp: SFTPConfig = field(default_factory=SFTPConfig)
|
||||
calibre: CalibreConfig = field(default_factory=CalibreConfig)
|
||||
local_work_dir: str = "/tmp/calibresync"
|
||||
work_dir: str = "/tmp/calibresync"
|
||||
import_dir: str = ""
|
||||
|
||||
|
||||
def load() -> AppConfig:
|
||||
@@ -40,12 +33,8 @@ def load() -> AppConfig:
|
||||
password=s.get("sftp_password", ""),
|
||||
remote_path=s.get("sftp_remote_path", ""),
|
||||
),
|
||||
calibre=CalibreConfig(
|
||||
url=s.get("calibre_url", "").rstrip("/"),
|
||||
user=s.get("calibre_user", ""),
|
||||
password=s.get("calibre_pass", ""),
|
||||
),
|
||||
local_work_dir=s.get("local_work_dir", "/tmp/calibresync"),
|
||||
work_dir=s.get("work_dir", "/tmp/calibresync"),
|
||||
import_dir=s.get("import_dir", ""),
|
||||
)
|
||||
|
||||
|
||||
@@ -53,8 +42,8 @@ def save(form: dict) -> None:
|
||||
keys = [
|
||||
"sftp_host", "sftp_port", "sftp_user", "sftp_auth_method",
|
||||
"sftp_password", "sftp_remote_path",
|
||||
"calibre_url", "calibre_user", "calibre_pass",
|
||||
"local_work_dir", "scheduler_interval_minutes", "sync_batch_size",
|
||||
"work_dir", "import_dir",
|
||||
"scheduler_interval_minutes", "sync_batch_size",
|
||||
]
|
||||
for key in keys:
|
||||
if key in form and form[key] is not None:
|
||||
|
||||
@@ -47,26 +47,16 @@ def init_db() -> None:
|
||||
error_msg TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS uploaded_books (
|
||||
id INTEGER PRIMARY KEY,
|
||||
filename TEXT NOT NULL,
|
||||
file_hash TEXT UNIQUE NOT NULL,
|
||||
zip_source TEXT,
|
||||
uploaded_at TEXT,
|
||||
status TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS sync_runs (
|
||||
id INTEGER PRIMARY KEY,
|
||||
started_at TEXT NOT NULL,
|
||||
finished_at TEXT,
|
||||
zips_found INTEGER DEFAULT 0,
|
||||
zips_new INTEGER DEFAULT 0,
|
||||
books_uploaded INTEGER DEFAULT 0,
|
||||
books_skipped INTEGER DEFAULT 0,
|
||||
id INTEGER PRIMARY KEY,
|
||||
started_at TEXT NOT NULL,
|
||||
finished_at TEXT,
|
||||
zips_found INTEGER DEFAULT 0,
|
||||
zips_new INTEGER DEFAULT 0,
|
||||
books_imported INTEGER DEFAULT 0,
|
||||
books_errored INTEGER DEFAULT 0,
|
||||
status TEXT DEFAULT 'running',
|
||||
error_msg TEXT
|
||||
status TEXT DEFAULT 'running',
|
||||
error_msg TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS remote_zip_cache (
|
||||
@@ -171,40 +161,6 @@ def get_recent_zips(limit: int = 50) -> list[sqlite3.Row]:
|
||||
).fetchall()
|
||||
|
||||
|
||||
# --- Uploaded books ---
|
||||
|
||||
def is_book_uploaded(file_hash: str) -> bool:
|
||||
with get_db() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT id FROM uploaded_books WHERE file_hash = ? AND status IN ('uploaded', 'skipped_duplicate')",
|
||||
(file_hash,),
|
||||
).fetchone()
|
||||
return row is not None
|
||||
|
||||
|
||||
def record_book(filename: str, file_hash: str, zip_source: str, status: str) -> None:
|
||||
with get_db() as conn:
|
||||
conn.execute(
|
||||
"""INSERT INTO uploaded_books (filename, file_hash, zip_source, uploaded_at, status)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(file_hash) DO UPDATE SET status = excluded.status""",
|
||||
(filename, file_hash, zip_source, _now(), status),
|
||||
)
|
||||
|
||||
|
||||
def get_books(limit: int = 200, offset: int = 0) -> list[sqlite3.Row]:
|
||||
with get_db() as conn:
|
||||
return conn.execute(
|
||||
"SELECT * FROM uploaded_books ORDER BY uploaded_at DESC LIMIT ? OFFSET ?",
|
||||
(limit, offset),
|
||||
).fetchall()
|
||||
|
||||
|
||||
def get_books_count() -> int:
|
||||
with get_db() as conn:
|
||||
return conn.execute("SELECT COUNT(*) FROM uploaded_books").fetchone()[0]
|
||||
|
||||
|
||||
# --- Sync runs ---
|
||||
|
||||
def start_sync_run() -> int:
|
||||
@@ -233,35 +189,28 @@ def get_recent_runs(limit: int = 10) -> list[sqlite3.Row]:
|
||||
|
||||
def get_stats() -> dict:
|
||||
with get_db() as conn:
|
||||
total_books = conn.execute("SELECT COUNT(*) FROM uploaded_books").fetchone()[0]
|
||||
uploaded = conn.execute(
|
||||
"SELECT COUNT(*) FROM uploaded_books WHERE status = 'uploaded'"
|
||||
).fetchone()[0]
|
||||
skipped = conn.execute(
|
||||
"SELECT COUNT(*) FROM uploaded_books WHERE status = 'skipped_duplicate'"
|
||||
).fetchone()[0]
|
||||
total_zips = conn.execute("SELECT COUNT(*) FROM processed_zips").fetchone()[0]
|
||||
total_imported = conn.execute(
|
||||
"SELECT COALESCE(SUM(books_imported), 0) FROM sync_runs"
|
||||
).fetchone()[0]
|
||||
last_run = conn.execute(
|
||||
"SELECT started_at, status FROM sync_runs ORDER BY started_at DESC LIMIT 1"
|
||||
).fetchone()
|
||||
return {
|
||||
"total_books": total_books,
|
||||
"uploaded": uploaded,
|
||||
"skipped": skipped,
|
||||
"total_zips": total_zips,
|
||||
"total_imported": total_imported,
|
||||
"last_run": dict(last_run) if last_run else None,
|
||||
}
|
||||
|
||||
|
||||
def clear_sync_data() -> dict:
|
||||
"""Delete all processed_zips, uploaded_books, and sync_runs rows. Settings are kept.
|
||||
"""Delete all processed_zips and sync_runs rows. Settings are kept.
|
||||
Also resets the remote scan timestamp so the next sync does a full rescan."""
|
||||
with get_db() as conn:
|
||||
zips = conn.execute("DELETE FROM processed_zips").rowcount
|
||||
books = conn.execute("DELETE FROM uploaded_books").rowcount
|
||||
runs = conn.execute("DELETE FROM sync_runs").rowcount
|
||||
conn.execute("DELETE FROM settings WHERE key = 'remote_cache_last_scan'")
|
||||
return {"zips": zips, "books": books, "runs": runs}
|
||||
return {"zips": zips, "runs": runs}
|
||||
|
||||
|
||||
def _now() -> str:
|
||||
|
||||
@@ -6,6 +6,8 @@ services:
|
||||
volumes:
|
||||
# Persists the SQLite database and settings across container restarts
|
||||
- ./data:/app/data
|
||||
# CWA import folder — set the host path to match your CWA ingest directory
|
||||
- /path/to/cwa-import:/cwa-import
|
||||
# Optional: mount your SSH private key read-only instead of pasting it in the UI
|
||||
# - ~/.ssh/id_rsa:/run/secrets/ssh_key:ro
|
||||
restart: unless-stopped
|
||||
|
||||
@@ -12,8 +12,6 @@ import config
|
||||
import db
|
||||
import sftp as sftp_module
|
||||
import sync
|
||||
import uploader
|
||||
from uploader import CalibreClient, delete_book, fetch_all_books, find_duplicate_groups
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s — %(message)s")
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -77,23 +75,6 @@ async def dashboard(request: Request):
|
||||
})
|
||||
|
||||
|
||||
# --- Books ---
|
||||
|
||||
@app.get("/books", response_class=HTMLResponse)
|
||||
async def books_page(request: Request, page: int = 1):
|
||||
per_page = 50
|
||||
offset = (page - 1) * per_page
|
||||
books = [dict(b) for b in db.get_books(limit=per_page, offset=offset)]
|
||||
total = db.get_books_count()
|
||||
pages = max(1, (total + per_page - 1) // per_page)
|
||||
return templates.TemplateResponse(request, "books.html", {
|
||||
"books": books,
|
||||
"page": page,
|
||||
"pages": pages,
|
||||
"total": total,
|
||||
})
|
||||
|
||||
|
||||
# --- Settings ---
|
||||
|
||||
@app.get("/settings", response_class=HTMLResponse)
|
||||
@@ -117,10 +98,8 @@ async def save_settings(
|
||||
sftp_key: str = Form(""),
|
||||
sftp_password: str = Form(""),
|
||||
sftp_remote_path: str = Form(""),
|
||||
calibre_url: str = Form(""),
|
||||
calibre_user: str = Form(""),
|
||||
calibre_pass: str = Form(""),
|
||||
local_work_dir: str = Form("/tmp/calibresync"),
|
||||
work_dir: str = Form("/tmp/calibresync"),
|
||||
import_dir: str = Form(""),
|
||||
scheduler_interval_minutes: str = Form("0"),
|
||||
sync_batch_size: str = Form("0"),
|
||||
):
|
||||
@@ -132,10 +111,8 @@ async def save_settings(
|
||||
"sftp_key": sftp_key,
|
||||
"sftp_password": sftp_password,
|
||||
"sftp_remote_path": sftp_remote_path,
|
||||
"calibre_url": calibre_url,
|
||||
"calibre_user": calibre_user,
|
||||
"calibre_pass": calibre_pass,
|
||||
"local_work_dir": local_work_dir,
|
||||
"work_dir": work_dir,
|
||||
"import_dir": import_dir,
|
||||
"scheduler_interval_minutes": scheduler_interval_minutes,
|
||||
"sync_batch_size": sync_batch_size,
|
||||
})
|
||||
@@ -179,111 +156,6 @@ async def test_ssh():
|
||||
return {"ok": ok, "message": message}
|
||||
|
||||
|
||||
@app.get("/api/test/calibre")
|
||||
async def test_calibre():
|
||||
cfg = config.load()
|
||||
ok, message = uploader.test_connection(cfg.calibre)
|
||||
return {"ok": ok, "message": message}
|
||||
|
||||
|
||||
# --- Duplicates ---
|
||||
|
||||
@app.get("/duplicates", response_class=HTMLResponse)
|
||||
async def duplicates_page(request: Request):
|
||||
cfg = config.load()
|
||||
error = None
|
||||
groups: list = []
|
||||
total_books = 0
|
||||
try:
|
||||
books = fetch_all_books(cfg.calibre)
|
||||
total_books = len(books)
|
||||
groups = find_duplicate_groups(books)
|
||||
except Exception as e:
|
||||
error = str(e)
|
||||
return templates.TemplateResponse(request, "duplicates.html", {
|
||||
"groups": groups,
|
||||
"total_books": total_books,
|
||||
"error": error,
|
||||
})
|
||||
|
||||
|
||||
@app.post("/api/delete_book/{book_id}")
|
||||
async def delete_book_api(book_id: int):
|
||||
cfg = config.load()
|
||||
ok, message = delete_book(cfg.calibre, book_id)
|
||||
return {"ok": ok, "message": message}
|
||||
|
||||
|
||||
_dedup_state: dict = {"running": False, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None}
|
||||
|
||||
|
||||
def _run_dedup():
|
||||
global _dedup_state
|
||||
try:
|
||||
cfg = config.load()
|
||||
log.info("Dedup: fetching all books ...")
|
||||
client = CalibreClient(cfg.calibre)
|
||||
client._ensure_auth()
|
||||
books = fetch_all_books(cfg.calibre)
|
||||
groups = find_duplicate_groups(books)
|
||||
to_delete = [b for group in groups for b in sorted(group, key=lambda x: x.get("id", 0))[1:]]
|
||||
_dedup_state.update({"total": len(to_delete), "deleted": 0, "failed": 0})
|
||||
log.info("Dedup: %d duplicate(s) to delete across %d group(s)", len(to_delete), len(groups))
|
||||
for book in to_delete:
|
||||
ok, msg = delete_book(cfg.calibre, book["id"], client)
|
||||
if ok:
|
||||
_dedup_state["deleted"] += 1
|
||||
else:
|
||||
_dedup_state["failed"] += 1
|
||||
log.warning("Dedup: failed to delete book %d: %s", book["id"], msg)
|
||||
if _dedup_state["deleted"] % 10 == 0:
|
||||
log.info("Dedup progress: %d / %d deleted", _dedup_state["deleted"], _dedup_state["total"])
|
||||
log.info("Dedup done: %d deleted, %d failed", _dedup_state["deleted"], _dedup_state["failed"])
|
||||
except Exception as e:
|
||||
log.error("Dedup error: %s", e)
|
||||
_dedup_state["error"] = str(e)
|
||||
finally:
|
||||
_dedup_state["running"] = False
|
||||
_dedup_state["done"] = True
|
||||
|
||||
|
||||
@app.post("/api/delete_duplicates")
|
||||
async def delete_duplicates_api(background_tasks: BackgroundTasks):
|
||||
if _dedup_state["running"]:
|
||||
return {"ok": False, "message": "Already running"}
|
||||
_dedup_state.update({"running": True, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None})
|
||||
background_tasks.add_task(_run_dedup)
|
||||
return {"ok": True, "message": "Started"}
|
||||
|
||||
|
||||
@app.get("/api/delete_duplicates/status")
|
||||
async def delete_duplicates_status():
|
||||
return _dedup_state
|
||||
|
||||
|
||||
@app.get("/api/debug/calibre_books")
|
||||
async def debug_calibre_books():
|
||||
"""Show raw Calibre-Web listbooks response shape so we can identify field names."""
|
||||
cfg = config.load()
|
||||
from uploader import CalibreClient
|
||||
client = CalibreClient(cfg.calibre)
|
||||
client._ensure_auth()
|
||||
resp = client._session.get(
|
||||
f"{cfg.calibre.url}/ajax/listbooks",
|
||||
params={"draw": 1, "start": 0, "length": 5, "sort": "title", "order": "asc"},
|
||||
timeout=30,
|
||||
)
|
||||
data = resp.json()
|
||||
non_list = {k: v for k, v in data.items() if not isinstance(v, list)}
|
||||
list_keys = {k: len(v) for k, v in data.items() if isinstance(v, list)}
|
||||
return {
|
||||
"http_status": resp.status_code,
|
||||
"top_level_keys": list(data.keys()),
|
||||
"non_list_fields": non_list,
|
||||
"list_fields_lengths": list_keys,
|
||||
}
|
||||
|
||||
|
||||
# --- Data reset ---
|
||||
|
||||
@app.post("/settings/reset-sync-data")
|
||||
|
||||
@@ -4,5 +4,4 @@ jinja2
|
||||
python-multipart
|
||||
paramiko
|
||||
rarfile
|
||||
requests
|
||||
apscheduler
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
@@ -7,7 +8,6 @@ import config
|
||||
import db
|
||||
import extractor
|
||||
import sftp as sftp_module
|
||||
from uploader import CalibreClient, CalibreUnavailableError
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -33,22 +33,23 @@ def run_sync(limit: int | None = None) -> None:
|
||||
|
||||
_running = True
|
||||
run_id = db.start_sync_run()
|
||||
counters = dict(zips_found=0, zips_new=0, books_uploaded=0, books_skipped=0, books_errored=0)
|
||||
counters = dict(zips_found=0, zips_new=0, books_imported=0, books_errored=0)
|
||||
|
||||
try:
|
||||
log.info("Sync started (limit=%s)", limit)
|
||||
cfg = config.load()
|
||||
_validate_config(cfg)
|
||||
log.info("Config OK — work dir: %s", cfg.local_work_dir)
|
||||
log.info("Config OK — work dir: %s, import dir: %s", cfg.work_dir, cfg.import_dir)
|
||||
|
||||
work_dir = Path(cfg.local_work_dir)
|
||||
work_dir = Path(cfg.work_dir)
|
||||
work_dir.mkdir(parents=True, exist_ok=True)
|
||||
log.info("Work dir ready: %s", work_dir)
|
||||
|
||||
import_dir = Path(cfg.import_dir)
|
||||
import_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
log.info("Connecting to SFTP %s@%s:%s ...", cfg.sftp.user, cfg.sftp.host, cfg.sftp.port)
|
||||
new_zips = sftp_module.list_new_zips(cfg.sftp, max_results=limit)
|
||||
counters["zips_found"] = len(new_zips)
|
||||
|
||||
counters["zips_new"] = len(new_zips)
|
||||
|
||||
if not new_zips:
|
||||
@@ -56,21 +57,11 @@ def run_sync(limit: int | None = None) -> None:
|
||||
db.finish_sync_run(run_id, status="success", **counters)
|
||||
return
|
||||
|
||||
# Determine chunk size; 0 means process everything in one chunk
|
||||
batch_size = int(db.get_setting("sync_batch_size", "0") or "0")
|
||||
if batch_size <= 0:
|
||||
batch_size = len(new_zips)
|
||||
|
||||
total_batches = -(-len(new_zips) // batch_size) # ceiling division
|
||||
client = CalibreClient(cfg.calibre)
|
||||
|
||||
# Pre-load existing book titles so duplicate detection doesn't need per-book OPDS searches
|
||||
try:
|
||||
from uploader import fetch_all_books
|
||||
existing = fetch_all_books(cfg.calibre)
|
||||
client.preload_existing_titles(existing)
|
||||
except Exception as exc:
|
||||
log.warning("Could not pre-load existing books (%s) — will fall back to per-book OPDS search", exc)
|
||||
total_batches = -(-len(new_zips) // batch_size)
|
||||
|
||||
for batch_num, i in enumerate(range(0, len(new_zips), batch_size), start=1):
|
||||
chunk = new_zips[i : i + batch_size]
|
||||
@@ -89,34 +80,21 @@ def run_sync(limit: int | None = None) -> None:
|
||||
books = extractor.extract(local_zip, work_dir / "extracted")
|
||||
log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books))
|
||||
|
||||
books_errored_this_zip = 0
|
||||
for book in books:
|
||||
t2 = time.monotonic()
|
||||
status = client.upload(book, zip_source=remote_zip.remote_path)
|
||||
log.info("Upload '%s' → %s (%.1fs)", book.name, status, time.monotonic() - t2)
|
||||
time.sleep(2)
|
||||
if status == "uploaded":
|
||||
counters["books_uploaded"] += 1
|
||||
elif status == "skipped_duplicate":
|
||||
counters["books_skipped"] += 1
|
||||
dest = import_dir / book.name
|
||||
if dest.exists():
|
||||
log.info("Skipping '%s' — already exists in import dir", book.name)
|
||||
else:
|
||||
counters["books_errored"] += 1
|
||||
books_errored_this_zip += 1
|
||||
|
||||
if books_errored_this_zip:
|
||||
zip_status = "error"
|
||||
zip_error = f"{books_errored_this_zip} book upload(s) failed — will retry next sync"
|
||||
shutil.move(str(book), str(dest))
|
||||
log.info("Moved '%s' → %s", book.name, import_dir)
|
||||
counters["books_imported"] += 1
|
||||
|
||||
extractor.cleanup(work_dir / "extracted" / local_zip.stem)
|
||||
except CalibreUnavailableError as e:
|
||||
log.error("Calibre-Web unavailable — aborting sync run: %s", e)
|
||||
db.mark_zip_processed(remote_zip.remote_path, remote_zip.file_size, "error", str(e))
|
||||
db.finish_sync_run(run_id, status="error", error_msg=str(e), **counters)
|
||||
return
|
||||
except Exception as e:
|
||||
log.error("Error processing %s: %s", remote_zip.remote_path, e)
|
||||
zip_status = "error"
|
||||
zip_error = str(e)
|
||||
counters["books_errored"] += 1
|
||||
finally:
|
||||
if local_zip and local_zip.exists():
|
||||
extractor.cleanup(local_zip)
|
||||
@@ -126,9 +104,8 @@ def run_sync(limit: int | None = None) -> None:
|
||||
|
||||
db.finish_sync_run(run_id, status="success", **counters)
|
||||
log.info(
|
||||
"Sync complete. Total zips: %d, Uploaded: %d, Skipped: %d, Errors: %d",
|
||||
counters["zips_new"], counters["books_uploaded"],
|
||||
counters["books_skipped"], counters["books_errored"],
|
||||
"Sync complete. Total zips: %d, Imported: %d, Errors: %d",
|
||||
counters["zips_new"], counters["books_imported"], counters["books_errored"],
|
||||
)
|
||||
except Exception as e:
|
||||
log.exception("Sync run failed: %s", e)
|
||||
@@ -150,9 +127,7 @@ def _validate_config(cfg) -> None:
|
||||
missing.append("SSH private key")
|
||||
if cfg.sftp.auth_method == "password" and not cfg.sftp.password:
|
||||
missing.append("SSH password")
|
||||
if not cfg.calibre.url:
|
||||
missing.append("Calibre-Web URL")
|
||||
if not cfg.calibre.user:
|
||||
missing.append("Calibre-Web username")
|
||||
if not cfg.import_dir:
|
||||
missing.append("CWA import folder")
|
||||
if missing:
|
||||
raise ValueError(f"Missing configuration: {', '.join(missing)}")
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
{% extends "base.html" %}
|
||||
{% block title %}Books — CalibreSync{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="page-header">
|
||||
<h1>Books <span class="muted">({{ total }})</span></h1>
|
||||
</div>
|
||||
|
||||
{% if books %}
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Filename</th>
|
||||
<th>Status</th>
|
||||
<th>Source zip</th>
|
||||
<th>Uploaded</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for b in books %}
|
||||
<tr>
|
||||
<td>{{ b.filename }}</td>
|
||||
<td><span class="badge badge-{{ b.status }}">{{ b.status }}</span></td>
|
||||
<td class="mono small muted">{{ b.zip_source or "—" }}</td>
|
||||
<td>{{ b.uploaded_at[:19].replace("T"," ") if b.uploaded_at else "—" }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
{% if pages > 1 %}
|
||||
<div class="pagination">
|
||||
{% if page > 1 %}
|
||||
<a href="/books?page={{ page - 1 }}">« Prev</a>
|
||||
{% endif %}
|
||||
<span>Page {{ page }} of {{ pages }}</span>
|
||||
{% if page < pages %}
|
||||
<a href="/books?page={{ page + 1 }}">Next »</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% else %}
|
||||
<p class="muted">No books recorded yet.</p>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -1,124 +0,0 @@
|
||||
{% extends "base.html" %}
|
||||
{% block title %}Duplicates — CalibreSync{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="page-header">
|
||||
<h1>Duplicate books in Calibre-Web</h1>
|
||||
{% if groups %}
|
||||
<div class="header-actions">
|
||||
<button class="btn btn-danger" onclick="deleteAll(this)">Delete all duplicates (keep oldest)</button>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div id="dedup-progress" style="display:none" class="alert alert-success"></div>
|
||||
|
||||
{% if error %}
|
||||
<div class="alert alert-warning">Could not fetch books from Calibre-Web: {{ error }}</div>
|
||||
{% else %}
|
||||
<p class="muted small" style="margin-bottom:1.5rem">
|
||||
Scanned <strong>{{ total_books }}</strong> book(s) —
|
||||
{% if groups %}
|
||||
found <strong>{{ groups|length }}</strong> duplicate group(s) (same title + author).
|
||||
The oldest copy (lowest ID) is kept when deleting all.
|
||||
{% else %}
|
||||
no duplicates found.
|
||||
{% endif %}
|
||||
</p>
|
||||
|
||||
{% for group in groups %}
|
||||
<div class="form-section" style="margin-bottom:1rem">
|
||||
<h3 style="margin-top:0">{{ group[0].title }}</h3>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Title</th>
|
||||
<th>Authors</th>
|
||||
<th>Format</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for book in group %}
|
||||
<tr id="row-{{ book.id }}">
|
||||
<td class="mono muted">{{ book.id }}</td>
|
||||
<td>{{ book.title }}</td>
|
||||
<td>{{ book.authors }}</td>
|
||||
<td>{{ book.format or "—" }}</td>
|
||||
<td>
|
||||
<button class="btn btn-danger" style="padding:0.2rem 0.7rem;font-size:0.85rem"
|
||||
onclick="deleteBook({{ book.id }}, this)">Delete</button>
|
||||
<span id="status-{{ book.id }}" class="muted small" style="margin-left:0.5rem"></span>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
<script>
|
||||
async function deleteBook(id, btn) {
|
||||
if (!confirm('Delete book ID ' + id + ' from Calibre-Web?')) return;
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Deleting…';
|
||||
const status = document.getElementById('status-' + id);
|
||||
try {
|
||||
const r = await fetch('/api/delete_book/' + id, {method: 'POST'});
|
||||
const data = await r.json();
|
||||
if (data.ok) {
|
||||
document.getElementById('row-' + id).style.opacity = '0.35';
|
||||
btn.textContent = 'Deleted';
|
||||
status.textContent = '✓';
|
||||
} else {
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Delete';
|
||||
status.textContent = 'Failed: ' + data.message;
|
||||
status.style.color = 'var(--error, #f87171)';
|
||||
}
|
||||
} catch (e) {
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Delete';
|
||||
status.textContent = 'Error: ' + e;
|
||||
}
|
||||
}
|
||||
|
||||
async function deleteAll(btn) {
|
||||
if (!confirm('Delete all duplicates from Calibre-Web, keeping the oldest copy of each title+author? This cannot be undone.')) return;
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Starting…';
|
||||
const progress = document.getElementById('dedup-progress');
|
||||
progress.style.display = '';
|
||||
progress.textContent = 'Fetching book list from Calibre-Web…';
|
||||
|
||||
await fetch('/api/delete_duplicates', {method: 'POST'});
|
||||
|
||||
const poll = setInterval(async () => {
|
||||
const r = await fetch('/api/delete_duplicates/status');
|
||||
const s = await r.json();
|
||||
if (s.error) {
|
||||
clearInterval(poll);
|
||||
progress.textContent = 'Error: ' + s.error;
|
||||
progress.className = 'alert alert-warning';
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Delete all duplicates (keep oldest)';
|
||||
return;
|
||||
}
|
||||
if (s.total > 0) {
|
||||
progress.textContent = `Deleting… ${s.deleted} / ${s.total} deleted, ${s.failed} failed`;
|
||||
} else {
|
||||
progress.textContent = 'Scanning for duplicates…';
|
||||
}
|
||||
if (s.done) {
|
||||
clearInterval(poll);
|
||||
progress.textContent = `Done — ${s.deleted} book(s) deleted, ${s.failed} failed. Reload to refresh the list.`;
|
||||
btn.textContent = 'Reload';
|
||||
btn.disabled = false;
|
||||
btn.onclick = () => location.reload();
|
||||
}
|
||||
}, 2000);
|
||||
}
|
||||
</script>
|
||||
{% endblock %}
|
||||
+4
-14
@@ -59,16 +59,8 @@
|
||||
<div class="stat-label">Zip archives processed</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-value">{{ stats.uploaded }}</div>
|
||||
<div class="stat-label">Books uploaded</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-value">{{ stats.skipped }}</div>
|
||||
<div class="stat-label">Duplicates skipped</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-value">{{ stats.total_books }}</div>
|
||||
<div class="stat-label">Total book records</div>
|
||||
<div class="stat-value">{{ stats.total_imported }}</div>
|
||||
<div class="stat-label">Books imported</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -81,8 +73,7 @@
|
||||
<th>Finished</th>
|
||||
<th>Status</th>
|
||||
<th>New zips</th>
|
||||
<th>Uploaded</th>
|
||||
<th>Skipped</th>
|
||||
<th>Imported</th>
|
||||
<th>Errors</th>
|
||||
</tr>
|
||||
</thead>
|
||||
@@ -93,8 +84,7 @@
|
||||
<td>{{ r.finished_at[:19].replace("T"," ") if r.finished_at else "—" }}</td>
|
||||
<td><span class="badge badge-{{ r.status }}">{{ r.status }}</span></td>
|
||||
<td>{{ r.zips_new }}</td>
|
||||
<td>{{ r.books_uploaded }}</td>
|
||||
<td>{{ r.books_skipped }}</td>
|
||||
<td>{{ r.books_imported }}</td>
|
||||
<td>{{ r.books_errored }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
|
||||
+11
-30
@@ -84,39 +84,20 @@
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="form-section">
|
||||
<h2>Calibre-Web</h2>
|
||||
|
||||
<div class="form-row">
|
||||
<label for="calibre_url">URL</label>
|
||||
<input id="calibre_url" name="calibre_url" type="url" placeholder="http://localhost:8083"
|
||||
value="{{ s.get('calibre_url','') }}">
|
||||
</div>
|
||||
|
||||
<div class="form-row">
|
||||
<label for="calibre_user">Username</label>
|
||||
<input id="calibre_user" name="calibre_user" type="text" value="{{ s.get('calibre_user','') }}">
|
||||
</div>
|
||||
|
||||
<div class="form-row">
|
||||
<label for="calibre_pass">Password</label>
|
||||
<input id="calibre_pass" name="calibre_pass" type="password"
|
||||
value="{{ s.get('calibre_pass','') }}">
|
||||
</div>
|
||||
|
||||
<div class="form-row">
|
||||
<button type="button" class="btn btn-secondary" onclick="testConn('calibre', this)">Test Calibre-Web connection</button>
|
||||
<p id="test-calibre-result" class="test-result"></p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="form-section">
|
||||
<h2>Local</h2>
|
||||
|
||||
<div class="form-row">
|
||||
<label for="local_work_dir">Work directory</label>
|
||||
<input id="local_work_dir" name="local_work_dir" type="text" placeholder="/tmp/calibresync"
|
||||
value="{{ s.get('local_work_dir','/tmp/calibresync') }}">
|
||||
<label for="import_dir">CWA import folder</label>
|
||||
<input id="import_dir" name="import_dir" type="text" placeholder="/mnt/cwa-import"
|
||||
value="{{ s.get('import_dir','') }}">
|
||||
<p class="muted small">Folder watched by Calibre-Web-Automated. Extracted epub/pdf files are moved here flat.</p>
|
||||
</div>
|
||||
|
||||
<div class="form-row">
|
||||
<label for="work_dir">Temp work directory</label>
|
||||
<input id="work_dir" name="work_dir" type="text" placeholder="/tmp/calibresync"
|
||||
value="{{ s.get('work_dir','/tmp/calibresync') }}">
|
||||
<p class="muted small">Temporary storage for downloaded zips and extracted files. Cleaned up after each run.</p>
|
||||
</div>
|
||||
</section>
|
||||
@@ -182,7 +163,7 @@ async function testConn(type, btn) {
|
||||
result.className = "test-result test-fail";
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
btn.textContent = type === "ssh" ? "Test SSH connection" : "Test Calibre-Web connection";
|
||||
btn.textContent = "Test SSH connection";
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
-413
@@ -1,413 +0,0 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
from urllib.parse import quote
|
||||
|
||||
import requests
|
||||
|
||||
import db
|
||||
from config import CalibreConfig
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
MIME_TYPES = {
|
||||
".epub": "application/epub+zip",
|
||||
".pdf": "application/pdf",
|
||||
}
|
||||
|
||||
# Words stripped before comparing titles — release-group tags, language codes, format names, etc.
|
||||
_JUNK_WORDS = {
|
||||
"retail", "epub", "ebook", "pdf", "mobi", "azw3", "decipher",
|
||||
"swedish", "english", "danish", "norwegian", "finnish", "german", "french",
|
||||
"the", "a", "an", "och", "und", "les", "der", "die", "das",
|
||||
}
|
||||
|
||||
|
||||
class CalibreUnavailableError(RuntimeError):
|
||||
"""Raised when Calibre-Web returns repeated 502/503/504 — sync run should abort."""
|
||||
|
||||
|
||||
class CalibreClient:
|
||||
def __init__(self, cfg: CalibreConfig):
|
||||
self._cfg = cfg
|
||||
self._session = requests.Session()
|
||||
self._authenticated = False
|
||||
self._upload_csrf: str | None = None
|
||||
self._consecutive_failures = 0
|
||||
# Pre-loaded title word-sets for fast duplicate detection (set by preload_existing_titles)
|
||||
self._existing_title_sets: list[frozenset[str]] | None = None
|
||||
|
||||
def preload_existing_titles(self, books: list[dict]) -> None:
|
||||
"""Build an in-memory index of normalised title keywords from a pre-fetched book list."""
|
||||
self._existing_title_sets = [
|
||||
frozenset(_normalize_words(b.get("title", "")))
|
||||
for b in books
|
||||
if b.get("title")
|
||||
]
|
||||
log.info("Pre-loaded %d existing book titles for duplicate detection", len(self._existing_title_sets))
|
||||
|
||||
def _ensure_auth(self) -> None:
|
||||
if self._authenticated:
|
||||
return
|
||||
login_url = f"{self._cfg.url}/login"
|
||||
page = self._session.get(login_url, timeout=30)
|
||||
page.raise_for_status()
|
||||
csrf = _extract_csrf(page.text)
|
||||
|
||||
data = {"username": self._cfg.user, "password": self._cfg.password}
|
||||
if csrf:
|
||||
data["csrf_token"] = csrf
|
||||
|
||||
resp = self._session.post(login_url, data=data, allow_redirects=True, timeout=30)
|
||||
resp.raise_for_status()
|
||||
if resp.url.rstrip("/").endswith("/login"):
|
||||
raise RuntimeError("Calibre-Web authentication failed — check credentials")
|
||||
self._authenticated = True
|
||||
self._upload_csrf = _extract_csrf(resp.text) or csrf
|
||||
log.info("Authenticated to Calibre-Web at %s", self._cfg.url)
|
||||
|
||||
def _exists_in_calibre(self, filename: str) -> bool:
|
||||
"""Check whether a book already exists in Calibre-Web. Returns True if likely duplicate."""
|
||||
keywords = _keywords_from_filename(filename)
|
||||
if len(keywords) < 2:
|
||||
return False
|
||||
our_words = set(keywords)
|
||||
|
||||
# Fast path: check pre-loaded title index (available when sync pre-fetches all books)
|
||||
if self._existing_title_sets is not None:
|
||||
for their_words in self._existing_title_sets:
|
||||
if not their_words:
|
||||
continue
|
||||
overlap = len(our_words & their_words)
|
||||
# Match if: 3+ words in common, OR 60%+ of filename keywords match the title,
|
||||
# OR 60%+ of the stored title's words appear in the filename keywords.
|
||||
# The third condition catches short titles drowned out by filename noise.
|
||||
if (overlap >= 3
|
||||
or (overlap / len(our_words) >= 0.6)
|
||||
or (len(their_words) >= 2 and overlap / len(their_words) >= 0.6)):
|
||||
log.info("Duplicate (preloaded index): '%s'", filename)
|
||||
return True
|
||||
return False
|
||||
|
||||
# Slow path fallback: OPDS search (used when no index is available)
|
||||
query = " ".join(keywords[:6])
|
||||
try:
|
||||
resp = self._session.get(
|
||||
f"{self._cfg.url}/opds/search/{quote(query, safe='')}",
|
||||
auth=(self._cfg.user, self._cfg.password),
|
||||
timeout=15,
|
||||
)
|
||||
if resp.status_code == 404:
|
||||
return False
|
||||
calibre_titles = _parse_opds_titles(resp.text)
|
||||
if not calibre_titles:
|
||||
return False
|
||||
|
||||
for title in calibre_titles:
|
||||
their_words = set(_normalize_words(title))
|
||||
if not their_words:
|
||||
continue
|
||||
overlap = len(our_words & their_words)
|
||||
if (overlap >= 3
|
||||
or (overlap / len(our_words) >= 0.6)
|
||||
or (len(their_words) >= 2 and overlap / len(their_words) >= 0.6)):
|
||||
log.info("Duplicate (OPDS search): '%s'", filename)
|
||||
return True
|
||||
except Exception as e:
|
||||
log.warning("OPDS search failed for '%s': %s — proceeding with upload", filename, e)
|
||||
return False
|
||||
|
||||
def upload(self, book_path: Path, zip_source: str) -> str:
|
||||
"""Upload a book file. Returns status: 'uploaded' | 'skipped_duplicate' | 'error'."""
|
||||
file_hash = _sha256(book_path)
|
||||
|
||||
# Primary guard: hash already in our DB
|
||||
if db.is_book_uploaded(file_hash):
|
||||
log.info("Skipping (already uploaded): %s", book_path.name)
|
||||
db.record_book(book_path.name, file_hash, zip_source, "skipped_duplicate")
|
||||
return "skipped_duplicate"
|
||||
|
||||
try:
|
||||
self._ensure_auth()
|
||||
|
||||
# Secondary guard: title search in Calibre-Web (catches pre-existing books)
|
||||
if self._exists_in_calibre(book_path.name):
|
||||
log.info("Skipping (exists in Calibre-Web): %s", book_path.name)
|
||||
db.record_book(book_path.name, file_hash, zip_source, "skipped_duplicate")
|
||||
return "skipped_duplicate"
|
||||
|
||||
mime = MIME_TYPES.get(book_path.suffix.lower(), "application/octet-stream")
|
||||
for attempt in range(1, 4):
|
||||
try:
|
||||
with book_path.open("rb") as fh:
|
||||
resp = self._session.post(
|
||||
f"{self._cfg.url}/upload",
|
||||
files={"btn-upload": (book_path.name, fh, mime)},
|
||||
data={"csrf_token": self._upload_csrf} if self._upload_csrf else {},
|
||||
timeout=120,
|
||||
)
|
||||
if not resp.ok:
|
||||
log.error("Upload HTTP %s (attempt %d/3) — body: %s", resp.status_code, attempt, resp.text[:300])
|
||||
resp.raise_for_status()
|
||||
log.info("Uploaded: %s", book_path.name)
|
||||
self._consecutive_failures = 0
|
||||
db.record_book(book_path.name, file_hash, zip_source, "uploaded")
|
||||
# Add to in-session index so a later zip with the same title is skipped
|
||||
if self._existing_title_sets is not None:
|
||||
kw = frozenset(_keywords_from_filename(book_path.name))
|
||||
if kw:
|
||||
self._existing_title_sets.append(kw)
|
||||
return "uploaded"
|
||||
except requests.HTTPError:
|
||||
if resp.status_code in (502, 503, 504):
|
||||
if attempt < 3:
|
||||
log.warning("HTTP %s on attempt %d/3 — retrying in 180s ...", resp.status_code, attempt)
|
||||
time.sleep(180)
|
||||
continue
|
||||
# All retries exhausted
|
||||
self._consecutive_failures += 1
|
||||
if self._consecutive_failures >= 3:
|
||||
raise CalibreUnavailableError(
|
||||
f"Calibre-Web returned {resp.status_code} on {self._consecutive_failures} "
|
||||
"consecutive books — aborting sync run"
|
||||
)
|
||||
break
|
||||
if resp.status_code == 400 and attempt == 1:
|
||||
log.warning("HTTP 400 — CSRF token likely expired, re-authenticating ...")
|
||||
self._authenticated = False
|
||||
self._upload_csrf = None
|
||||
self._ensure_auth()
|
||||
continue
|
||||
break
|
||||
|
||||
db.record_book(book_path.name, file_hash, zip_source, "error")
|
||||
return "error"
|
||||
except CalibreUnavailableError:
|
||||
db.record_book(book_path.name, file_hash, zip_source, "error")
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error("Upload failed for %s: %s", book_path.name, e)
|
||||
db.record_book(book_path.name, file_hash, zip_source, "error")
|
||||
return "error"
|
||||
|
||||
|
||||
def fetch_all_books(cfg: CalibreConfig) -> list[dict]:
|
||||
"""Fetch every book from Calibre-Web. Tries /ajax/listbooks first; falls back to OPDS if pagination is broken."""
|
||||
client = CalibreClient(cfg)
|
||||
client._ensure_auth()
|
||||
all_books: list[dict] = []
|
||||
seen_ids: set = set()
|
||||
page_size = 1000
|
||||
start = 0
|
||||
reported_total = 0
|
||||
while True:
|
||||
resp = client._session.get(
|
||||
f"{cfg.url}/ajax/listbooks",
|
||||
params={
|
||||
"draw": 1,
|
||||
"start": start, "length": page_size,
|
||||
"iDisplayStart": start, "iDisplayLength": page_size,
|
||||
},
|
||||
timeout=60,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
if start == 0:
|
||||
non_list = {k: v for k, v in data.items() if not isinstance(v, list)}
|
||||
log.info("listbooks page-0 meta fields: %s", non_list)
|
||||
rows = data.get("rows") or data.get("data") or []
|
||||
reported_total = (
|
||||
data.get("recordsTotal") or data.get("total_count") or
|
||||
data.get("total") or data.get("totalNotFiltered") or 0
|
||||
)
|
||||
new_in_page = 0
|
||||
for b in rows:
|
||||
bid = b.get("id")
|
||||
if bid not in seen_ids:
|
||||
seen_ids.add(bid)
|
||||
all_books.append(b)
|
||||
new_in_page += 1
|
||||
log.info("Books fetched: %d / %d (page gave %d new)", len(all_books), reported_total, new_in_page)
|
||||
if not rows or new_in_page == 0 or len(all_books) >= reported_total:
|
||||
break
|
||||
start += len(rows)
|
||||
|
||||
# If we got far fewer books than reported, listbooks pagination is broken — use OPDS instead
|
||||
if reported_total > 0 and len(all_books) < reported_total // 2:
|
||||
log.warning(
|
||||
"listbooks pagination broken (%d/%d books retrieved). Falling back to OPDS.",
|
||||
len(all_books), reported_total,
|
||||
)
|
||||
return _fetch_all_books_opds(cfg)
|
||||
return all_books
|
||||
|
||||
|
||||
def _fetch_all_books_opds(cfg: CalibreConfig) -> list[dict]:
|
||||
"""Fetch all books via OPDS catalog, following next-page links."""
|
||||
import xml.etree.ElementTree as ET
|
||||
books: list[dict] = []
|
||||
seen_ids: set = set()
|
||||
url: str | None = f"{cfg.url}/opds/new"
|
||||
auth = (cfg.user, cfg.password)
|
||||
session = requests.Session()
|
||||
|
||||
while url:
|
||||
resp = session.get(url, auth=auth, timeout=30)
|
||||
if not resp.ok:
|
||||
log.warning("OPDS fetch failed HTTP %s — %s", resp.status_code, url)
|
||||
break
|
||||
try:
|
||||
root = ET.fromstring(resp.content)
|
||||
except ET.ParseError as exc:
|
||||
log.warning("OPDS XML parse error: %s", exc)
|
||||
break
|
||||
|
||||
next_url: str | None = None
|
||||
entries_this_page = 0
|
||||
for elem in root:
|
||||
local = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
||||
if local == "link" and elem.get("rel") == "next":
|
||||
href = elem.get("href", "")
|
||||
next_url = href if href.startswith("http") else f"{cfg.url}{href}"
|
||||
elif local == "entry":
|
||||
entries_this_page += 1
|
||||
title = ""
|
||||
author_parts: list[str] = []
|
||||
book_id: int | None = None
|
||||
for child in elem:
|
||||
ctag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
|
||||
if ctag == "title":
|
||||
title = child.text or ""
|
||||
elif ctag == "author":
|
||||
for gc in child:
|
||||
if (gc.tag.split("}")[-1] if "}" in gc.tag else gc.tag) == "name":
|
||||
author_parts.append(gc.text or "")
|
||||
elif ctag == "link":
|
||||
m = re.search(r"/download/(\d+)/", child.get("href", ""))
|
||||
if m and book_id is None:
|
||||
book_id = int(m.group(1))
|
||||
if book_id and book_id not in seen_ids:
|
||||
seen_ids.add(book_id)
|
||||
books.append({"id": book_id, "title": title, "authors": " & ".join(author_parts)})
|
||||
|
||||
log.info("OPDS fetched: %d books total (page had %d entries)", len(books), entries_this_page)
|
||||
if not entries_this_page:
|
||||
break
|
||||
url = next_url
|
||||
|
||||
return books
|
||||
|
||||
|
||||
def delete_book(cfg: CalibreConfig, book_id: int, client: "CalibreClient | None" = None) -> tuple[bool, str]:
|
||||
"""Delete a book from Calibre-Web by ID. Pass a pre-authenticated client to avoid re-auth overhead."""
|
||||
if client is None:
|
||||
client = CalibreClient(cfg)
|
||||
client._ensure_auth()
|
||||
csrf = client._upload_csrf
|
||||
if not csrf:
|
||||
# Try to fetch a CSRF token from the book detail page
|
||||
try:
|
||||
page = client._session.get(f"{cfg.url}/book/{book_id}", timeout=15)
|
||||
csrf = _extract_csrf(page.text)
|
||||
client._upload_csrf = csrf
|
||||
except Exception:
|
||||
pass
|
||||
for attempt in range(2):
|
||||
resp = client._session.post(
|
||||
f"{cfg.url}/delete/{book_id}",
|
||||
data={"csrf_token": csrf} if csrf else {},
|
||||
timeout=30,
|
||||
)
|
||||
if resp.ok:
|
||||
return True, "Deleted"
|
||||
if resp.status_code == 400 and attempt == 0:
|
||||
# CSRF token likely expired; re-authenticate and retry once
|
||||
log.info("delete_book: 400 on book %d — refreshing CSRF and retrying", book_id)
|
||||
client._authenticated = False
|
||||
client._upload_csrf = None
|
||||
client._ensure_auth()
|
||||
csrf = client._upload_csrf
|
||||
continue
|
||||
return False, f"HTTP {resp.status_code}"
|
||||
return False, "HTTP 400 after re-auth retry"
|
||||
|
||||
|
||||
def find_duplicate_groups(books: list[dict]) -> list[list[dict]]:
|
||||
"""Group books by normalised title+author; return only groups with 2+ entries."""
|
||||
from collections import defaultdict
|
||||
groups: dict[str, list[dict]] = defaultdict(list)
|
||||
for book in books:
|
||||
title = re.sub(r"[^\w\s]", " ", book.get("title", "").lower())
|
||||
title = re.sub(r"\s+", " ", title).strip()
|
||||
authors = re.sub(r"[^\w\s]", " ", book.get("authors", "").lower())
|
||||
authors = re.sub(r"\s+", " ", authors).strip()
|
||||
key = f"{title}||{authors}"
|
||||
if title:
|
||||
groups[key].append(book)
|
||||
return sorted(
|
||||
[g for g in groups.values() if len(g) > 1],
|
||||
key=lambda g: g[0].get("title", "").lower(),
|
||||
)
|
||||
|
||||
|
||||
def test_connection(cfg: CalibreConfig) -> tuple[bool, str]:
|
||||
try:
|
||||
client = CalibreClient(cfg)
|
||||
client._ensure_auth()
|
||||
return True, f"Authenticated to {cfg.url} as '{cfg.user}'."
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
def _ascii_fold(s: str) -> str:
|
||||
"""Strip accents: 'världens' → 'varldens', 'väg' → 'vag'."""
|
||||
return "".join(c for c in unicodedata.normalize("NFKD", s) if unicodedata.category(c) != "Mn")
|
||||
|
||||
|
||||
def _keywords_from_filename(filename: str) -> list[str]:
|
||||
"""Extract meaningful words from a release-style filename for OPDS search."""
|
||||
stem = _ascii_fold(Path(filename).stem.lower())
|
||||
stem = re.sub(r"[._\-]", " ", stem)
|
||||
stem = re.sub(r"[^\w\s]", "", stem)
|
||||
words = stem.split()
|
||||
return [
|
||||
w for w in words
|
||||
if w not in _JUNK_WORDS
|
||||
and not re.match(r"^\d{4}$", w)
|
||||
and not re.match(r"^\d+$", w)
|
||||
and len(w) > 1
|
||||
]
|
||||
|
||||
|
||||
def _normalize_words(title: str) -> list[str]:
|
||||
"""Normalize a Calibre-Web title for comparison."""
|
||||
title = _ascii_fold(title.lower())
|
||||
title = re.sub(r"[^\w\s]", "", title)
|
||||
return [w for w in title.split() if w not in _JUNK_WORDS and len(w) > 1]
|
||||
|
||||
|
||||
def _parse_opds_titles(xml: str) -> list[str]:
|
||||
"""Extract book titles from an OPDS Atom feed, skipping the feed title itself."""
|
||||
# Grab all <title> elements; the first is the feed title ("Search results"), rest are books
|
||||
titles = re.findall(r"<title>([^<]+)</title>", xml)
|
||||
return titles[1:] if len(titles) > 1 else []
|
||||
|
||||
|
||||
def _extract_csrf(html: str) -> str | None:
|
||||
m = re.search(r'name="csrf_token"\s+value="([^"]+)"', html)
|
||||
if not m:
|
||||
m = re.search(r'value="([^"]+)"\s+name="csrf_token"', html)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def _sha256(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
Reference in New Issue
Block a user