diff --git a/config.py b/config.py index 315dd51..adfeca5 100644 --- a/config.py +++ b/config.py @@ -14,18 +14,11 @@ class SFTPConfig: remote_path: str = "" -@dataclass -class CalibreConfig: - url: str = "" - user: str = "" - password: str = "" - - @dataclass class AppConfig: sftp: SFTPConfig = field(default_factory=SFTPConfig) - calibre: CalibreConfig = field(default_factory=CalibreConfig) - local_work_dir: str = "/tmp/calibresync" + work_dir: str = "/tmp/calibresync" + import_dir: str = "" def load() -> AppConfig: @@ -40,12 +33,8 @@ def load() -> AppConfig: password=s.get("sftp_password", ""), remote_path=s.get("sftp_remote_path", ""), ), - calibre=CalibreConfig( - url=s.get("calibre_url", "").rstrip("/"), - user=s.get("calibre_user", ""), - password=s.get("calibre_pass", ""), - ), - local_work_dir=s.get("local_work_dir", "/tmp/calibresync"), + work_dir=s.get("work_dir", "/tmp/calibresync"), + import_dir=s.get("import_dir", ""), ) @@ -53,8 +42,8 @@ def save(form: dict) -> None: keys = [ "sftp_host", "sftp_port", "sftp_user", "sftp_auth_method", "sftp_password", "sftp_remote_path", - "calibre_url", "calibre_user", "calibre_pass", - "local_work_dir", "scheduler_interval_minutes", "sync_batch_size", + "work_dir", "import_dir", + "scheduler_interval_minutes", "sync_batch_size", ] for key in keys: if key in form and form[key] is not None: diff --git a/db.py b/db.py index e759dc7..fc85aa9 100644 --- a/db.py +++ b/db.py @@ -47,26 +47,16 @@ def init_db() -> None: error_msg TEXT ); - CREATE TABLE IF NOT EXISTS uploaded_books ( - id INTEGER PRIMARY KEY, - filename TEXT NOT NULL, - file_hash TEXT UNIQUE NOT NULL, - zip_source TEXT, - uploaded_at TEXT, - status TEXT - ); - CREATE TABLE IF NOT EXISTS sync_runs ( - id INTEGER PRIMARY KEY, - started_at TEXT NOT NULL, - finished_at TEXT, - zips_found INTEGER DEFAULT 0, - zips_new INTEGER DEFAULT 0, - books_uploaded INTEGER DEFAULT 0, - books_skipped INTEGER DEFAULT 0, + id INTEGER PRIMARY KEY, + started_at TEXT NOT NULL, + finished_at TEXT, + zips_found INTEGER DEFAULT 0, + zips_new INTEGER DEFAULT 0, + books_imported INTEGER DEFAULT 0, books_errored INTEGER DEFAULT 0, - status TEXT DEFAULT 'running', - error_msg TEXT + status TEXT DEFAULT 'running', + error_msg TEXT ); CREATE TABLE IF NOT EXISTS remote_zip_cache ( @@ -171,40 +161,6 @@ def get_recent_zips(limit: int = 50) -> list[sqlite3.Row]: ).fetchall() -# --- Uploaded books --- - -def is_book_uploaded(file_hash: str) -> bool: - with get_db() as conn: - row = conn.execute( - "SELECT id FROM uploaded_books WHERE file_hash = ? AND status IN ('uploaded', 'skipped_duplicate')", - (file_hash,), - ).fetchone() - return row is not None - - -def record_book(filename: str, file_hash: str, zip_source: str, status: str) -> None: - with get_db() as conn: - conn.execute( - """INSERT INTO uploaded_books (filename, file_hash, zip_source, uploaded_at, status) - VALUES (?, ?, ?, ?, ?) - ON CONFLICT(file_hash) DO UPDATE SET status = excluded.status""", - (filename, file_hash, zip_source, _now(), status), - ) - - -def get_books(limit: int = 200, offset: int = 0) -> list[sqlite3.Row]: - with get_db() as conn: - return conn.execute( - "SELECT * FROM uploaded_books ORDER BY uploaded_at DESC LIMIT ? OFFSET ?", - (limit, offset), - ).fetchall() - - -def get_books_count() -> int: - with get_db() as conn: - return conn.execute("SELECT COUNT(*) FROM uploaded_books").fetchone()[0] - - # --- Sync runs --- def start_sync_run() -> int: @@ -233,35 +189,28 @@ def get_recent_runs(limit: int = 10) -> list[sqlite3.Row]: def get_stats() -> dict: with get_db() as conn: - total_books = conn.execute("SELECT COUNT(*) FROM uploaded_books").fetchone()[0] - uploaded = conn.execute( - "SELECT COUNT(*) FROM uploaded_books WHERE status = 'uploaded'" - ).fetchone()[0] - skipped = conn.execute( - "SELECT COUNT(*) FROM uploaded_books WHERE status = 'skipped_duplicate'" - ).fetchone()[0] total_zips = conn.execute("SELECT COUNT(*) FROM processed_zips").fetchone()[0] + total_imported = conn.execute( + "SELECT COALESCE(SUM(books_imported), 0) FROM sync_runs" + ).fetchone()[0] last_run = conn.execute( "SELECT started_at, status FROM sync_runs ORDER BY started_at DESC LIMIT 1" ).fetchone() return { - "total_books": total_books, - "uploaded": uploaded, - "skipped": skipped, "total_zips": total_zips, + "total_imported": total_imported, "last_run": dict(last_run) if last_run else None, } def clear_sync_data() -> dict: - """Delete all processed_zips, uploaded_books, and sync_runs rows. Settings are kept. + """Delete all processed_zips and sync_runs rows. Settings are kept. Also resets the remote scan timestamp so the next sync does a full rescan.""" with get_db() as conn: zips = conn.execute("DELETE FROM processed_zips").rowcount - books = conn.execute("DELETE FROM uploaded_books").rowcount runs = conn.execute("DELETE FROM sync_runs").rowcount conn.execute("DELETE FROM settings WHERE key = 'remote_cache_last_scan'") - return {"zips": zips, "books": books, "runs": runs} + return {"zips": zips, "runs": runs} def _now() -> str: diff --git a/docker-compose.yml b/docker-compose.yml index 3b79528..e09f12a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,8 @@ services: volumes: # Persists the SQLite database and settings across container restarts - ./data:/app/data + # CWA import folder — set the host path to match your CWA ingest directory + - /path/to/cwa-import:/cwa-import # Optional: mount your SSH private key read-only instead of pasting it in the UI # - ~/.ssh/id_rsa:/run/secrets/ssh_key:ro restart: unless-stopped diff --git a/main.py b/main.py index 9b70385..e2b15ae 100644 --- a/main.py +++ b/main.py @@ -12,8 +12,6 @@ import config import db import sftp as sftp_module import sync -import uploader -from uploader import CalibreClient, delete_book, fetch_all_books, find_duplicate_groups logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s — %(message)s") log = logging.getLogger(__name__) @@ -77,23 +75,6 @@ async def dashboard(request: Request): }) -# --- Books --- - -@app.get("/books", response_class=HTMLResponse) -async def books_page(request: Request, page: int = 1): - per_page = 50 - offset = (page - 1) * per_page - books = [dict(b) for b in db.get_books(limit=per_page, offset=offset)] - total = db.get_books_count() - pages = max(1, (total + per_page - 1) // per_page) - return templates.TemplateResponse(request, "books.html", { - "books": books, - "page": page, - "pages": pages, - "total": total, - }) - - # --- Settings --- @app.get("/settings", response_class=HTMLResponse) @@ -117,10 +98,8 @@ async def save_settings( sftp_key: str = Form(""), sftp_password: str = Form(""), sftp_remote_path: str = Form(""), - calibre_url: str = Form(""), - calibre_user: str = Form(""), - calibre_pass: str = Form(""), - local_work_dir: str = Form("/tmp/calibresync"), + work_dir: str = Form("/tmp/calibresync"), + import_dir: str = Form(""), scheduler_interval_minutes: str = Form("0"), sync_batch_size: str = Form("0"), ): @@ -132,10 +111,8 @@ async def save_settings( "sftp_key": sftp_key, "sftp_password": sftp_password, "sftp_remote_path": sftp_remote_path, - "calibre_url": calibre_url, - "calibre_user": calibre_user, - "calibre_pass": calibre_pass, - "local_work_dir": local_work_dir, + "work_dir": work_dir, + "import_dir": import_dir, "scheduler_interval_minutes": scheduler_interval_minutes, "sync_batch_size": sync_batch_size, }) @@ -179,111 +156,6 @@ async def test_ssh(): return {"ok": ok, "message": message} -@app.get("/api/test/calibre") -async def test_calibre(): - cfg = config.load() - ok, message = uploader.test_connection(cfg.calibre) - return {"ok": ok, "message": message} - - -# --- Duplicates --- - -@app.get("/duplicates", response_class=HTMLResponse) -async def duplicates_page(request: Request): - cfg = config.load() - error = None - groups: list = [] - total_books = 0 - try: - books = fetch_all_books(cfg.calibre) - total_books = len(books) - groups = find_duplicate_groups(books) - except Exception as e: - error = str(e) - return templates.TemplateResponse(request, "duplicates.html", { - "groups": groups, - "total_books": total_books, - "error": error, - }) - - -@app.post("/api/delete_book/{book_id}") -async def delete_book_api(book_id: int): - cfg = config.load() - ok, message = delete_book(cfg.calibre, book_id) - return {"ok": ok, "message": message} - - -_dedup_state: dict = {"running": False, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None} - - -def _run_dedup(): - global _dedup_state - try: - cfg = config.load() - log.info("Dedup: fetching all books ...") - client = CalibreClient(cfg.calibre) - client._ensure_auth() - books = fetch_all_books(cfg.calibre) - groups = find_duplicate_groups(books) - to_delete = [b for group in groups for b in sorted(group, key=lambda x: x.get("id", 0))[1:]] - _dedup_state.update({"total": len(to_delete), "deleted": 0, "failed": 0}) - log.info("Dedup: %d duplicate(s) to delete across %d group(s)", len(to_delete), len(groups)) - for book in to_delete: - ok, msg = delete_book(cfg.calibre, book["id"], client) - if ok: - _dedup_state["deleted"] += 1 - else: - _dedup_state["failed"] += 1 - log.warning("Dedup: failed to delete book %d: %s", book["id"], msg) - if _dedup_state["deleted"] % 10 == 0: - log.info("Dedup progress: %d / %d deleted", _dedup_state["deleted"], _dedup_state["total"]) - log.info("Dedup done: %d deleted, %d failed", _dedup_state["deleted"], _dedup_state["failed"]) - except Exception as e: - log.error("Dedup error: %s", e) - _dedup_state["error"] = str(e) - finally: - _dedup_state["running"] = False - _dedup_state["done"] = True - - -@app.post("/api/delete_duplicates") -async def delete_duplicates_api(background_tasks: BackgroundTasks): - if _dedup_state["running"]: - return {"ok": False, "message": "Already running"} - _dedup_state.update({"running": True, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None}) - background_tasks.add_task(_run_dedup) - return {"ok": True, "message": "Started"} - - -@app.get("/api/delete_duplicates/status") -async def delete_duplicates_status(): - return _dedup_state - - -@app.get("/api/debug/calibre_books") -async def debug_calibre_books(): - """Show raw Calibre-Web listbooks response shape so we can identify field names.""" - cfg = config.load() - from uploader import CalibreClient - client = CalibreClient(cfg.calibre) - client._ensure_auth() - resp = client._session.get( - f"{cfg.calibre.url}/ajax/listbooks", - params={"draw": 1, "start": 0, "length": 5, "sort": "title", "order": "asc"}, - timeout=30, - ) - data = resp.json() - non_list = {k: v for k, v in data.items() if not isinstance(v, list)} - list_keys = {k: len(v) for k, v in data.items() if isinstance(v, list)} - return { - "http_status": resp.status_code, - "top_level_keys": list(data.keys()), - "non_list_fields": non_list, - "list_fields_lengths": list_keys, - } - - # --- Data reset --- @app.post("/settings/reset-sync-data") diff --git a/requirements.txt b/requirements.txt index 305da9b..66e2534 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,4 @@ jinja2 python-multipart paramiko rarfile -requests apscheduler diff --git a/sync.py b/sync.py index 84564f3..dd54e24 100644 --- a/sync.py +++ b/sync.py @@ -1,4 +1,5 @@ import logging +import shutil import threading import time from pathlib import Path @@ -7,7 +8,6 @@ import config import db import extractor import sftp as sftp_module -from uploader import CalibreClient, CalibreUnavailableError log = logging.getLogger(__name__) @@ -33,22 +33,23 @@ def run_sync(limit: int | None = None) -> None: _running = True run_id = db.start_sync_run() - counters = dict(zips_found=0, zips_new=0, books_uploaded=0, books_skipped=0, books_errored=0) + counters = dict(zips_found=0, zips_new=0, books_imported=0, books_errored=0) try: log.info("Sync started (limit=%s)", limit) cfg = config.load() _validate_config(cfg) - log.info("Config OK — work dir: %s", cfg.local_work_dir) + log.info("Config OK — work dir: %s, import dir: %s", cfg.work_dir, cfg.import_dir) - work_dir = Path(cfg.local_work_dir) + work_dir = Path(cfg.work_dir) work_dir.mkdir(parents=True, exist_ok=True) - log.info("Work dir ready: %s", work_dir) + + import_dir = Path(cfg.import_dir) + import_dir.mkdir(parents=True, exist_ok=True) log.info("Connecting to SFTP %s@%s:%s ...", cfg.sftp.user, cfg.sftp.host, cfg.sftp.port) new_zips = sftp_module.list_new_zips(cfg.sftp, max_results=limit) counters["zips_found"] = len(new_zips) - counters["zips_new"] = len(new_zips) if not new_zips: @@ -56,21 +57,11 @@ def run_sync(limit: int | None = None) -> None: db.finish_sync_run(run_id, status="success", **counters) return - # Determine chunk size; 0 means process everything in one chunk batch_size = int(db.get_setting("sync_batch_size", "0") or "0") if batch_size <= 0: batch_size = len(new_zips) - total_batches = -(-len(new_zips) // batch_size) # ceiling division - client = CalibreClient(cfg.calibre) - - # Pre-load existing book titles so duplicate detection doesn't need per-book OPDS searches - try: - from uploader import fetch_all_books - existing = fetch_all_books(cfg.calibre) - client.preload_existing_titles(existing) - except Exception as exc: - log.warning("Could not pre-load existing books (%s) — will fall back to per-book OPDS search", exc) + total_batches = -(-len(new_zips) // batch_size) for batch_num, i in enumerate(range(0, len(new_zips), batch_size), start=1): chunk = new_zips[i : i + batch_size] @@ -89,34 +80,21 @@ def run_sync(limit: int | None = None) -> None: books = extractor.extract(local_zip, work_dir / "extracted") log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books)) - books_errored_this_zip = 0 for book in books: - t2 = time.monotonic() - status = client.upload(book, zip_source=remote_zip.remote_path) - log.info("Upload '%s' → %s (%.1fs)", book.name, status, time.monotonic() - t2) - time.sleep(2) - if status == "uploaded": - counters["books_uploaded"] += 1 - elif status == "skipped_duplicate": - counters["books_skipped"] += 1 + dest = import_dir / book.name + if dest.exists(): + log.info("Skipping '%s' — already exists in import dir", book.name) else: - counters["books_errored"] += 1 - books_errored_this_zip += 1 - - if books_errored_this_zip: - zip_status = "error" - zip_error = f"{books_errored_this_zip} book upload(s) failed — will retry next sync" + shutil.move(str(book), str(dest)) + log.info("Moved '%s' → %s", book.name, import_dir) + counters["books_imported"] += 1 extractor.cleanup(work_dir / "extracted" / local_zip.stem) - except CalibreUnavailableError as e: - log.error("Calibre-Web unavailable — aborting sync run: %s", e) - db.mark_zip_processed(remote_zip.remote_path, remote_zip.file_size, "error", str(e)) - db.finish_sync_run(run_id, status="error", error_msg=str(e), **counters) - return except Exception as e: log.error("Error processing %s: %s", remote_zip.remote_path, e) zip_status = "error" zip_error = str(e) + counters["books_errored"] += 1 finally: if local_zip and local_zip.exists(): extractor.cleanup(local_zip) @@ -126,9 +104,8 @@ def run_sync(limit: int | None = None) -> None: db.finish_sync_run(run_id, status="success", **counters) log.info( - "Sync complete. Total zips: %d, Uploaded: %d, Skipped: %d, Errors: %d", - counters["zips_new"], counters["books_uploaded"], - counters["books_skipped"], counters["books_errored"], + "Sync complete. Total zips: %d, Imported: %d, Errors: %d", + counters["zips_new"], counters["books_imported"], counters["books_errored"], ) except Exception as e: log.exception("Sync run failed: %s", e) @@ -150,9 +127,7 @@ def _validate_config(cfg) -> None: missing.append("SSH private key") if cfg.sftp.auth_method == "password" and not cfg.sftp.password: missing.append("SSH password") - if not cfg.calibre.url: - missing.append("Calibre-Web URL") - if not cfg.calibre.user: - missing.append("Calibre-Web username") + if not cfg.import_dir: + missing.append("CWA import folder") if missing: raise ValueError(f"Missing configuration: {', '.join(missing)}") diff --git a/templates/books.html b/templates/books.html deleted file mode 100644 index 1546e49..0000000 --- a/templates/books.html +++ /dev/null @@ -1,46 +0,0 @@ -{% extends "base.html" %} -{% block title %}Books — CalibreSync{% endblock %} - -{% block content %} -
| Filename | -Status | -Source zip | -Uploaded | -
|---|---|---|---|
| {{ b.filename }} | -{{ b.status }} | -{{ b.zip_source or "—" }} | -{{ b.uploaded_at[:19].replace("T"," ") if b.uploaded_at else "—" }} | -
No books recorded yet.
-{% endif %} -{% endblock %} diff --git a/templates/duplicates.html b/templates/duplicates.html deleted file mode 100644 index 09afc6e..0000000 --- a/templates/duplicates.html +++ /dev/null @@ -1,124 +0,0 @@ -{% extends "base.html" %} -{% block title %}Duplicates — CalibreSync{% endblock %} - -{% block content %} -- Scanned {{ total_books }} book(s) — - {% if groups %} - found {{ groups|length }} duplicate group(s) (same title + author). - The oldest copy (lowest ID) is kept when deleting all. - {% else %} - no duplicates found. - {% endif %} -
- - {% for group in groups %} -| ID | -Title | -Authors | -Format | -- |
|---|---|---|---|---|
| {{ book.id }} | -{{ book.title }} | -{{ book.authors }} | -{{ book.format or "—" }} | -- - - | -
Folder watched by Calibre-Web-Automated. Extracted epub/pdf files are moved here flat.
+Temporary storage for downloaded zips and extracted files. Cleaned up after each run.