cwa import

2026-05-13 18:24:55 +02:00
parent bfa09976b7
commit c0e1cb0688
11 changed files with 60 additions and 886 deletions
@@ -14,18 +14,11 @@ class SFTPConfig:
    remote_path: str = ""


-@dataclass
-class CalibreConfig:
-    url: str = ""
-    user: str = ""
-    password: str = ""
-
-
@dataclass
 class AppConfig:
    sftp: SFTPConfig = field(default_factory=SFTPConfig)
-    calibre: CalibreConfig = field(default_factory=CalibreConfig)
-    local_work_dir: str = "/tmp/calibresync"
+    work_dir: str = "/tmp/calibresync"
+    import_dir: str = ""


 def load() -> AppConfig:
@@ -40,12 +33,8 @@ def load() -> AppConfig:
            password=s.get("sftp_password", ""),
            remote_path=s.get("sftp_remote_path", ""),
        ),
-        calibre=CalibreConfig(
-            url=s.get("calibre_url", "").rstrip("/"),
-            user=s.get("calibre_user", ""),
-            password=s.get("calibre_pass", ""),
-        ),
-        local_work_dir=s.get("local_work_dir", "/tmp/calibresync"),
+        work_dir=s.get("work_dir", "/tmp/calibresync"),
+        import_dir=s.get("import_dir", ""),
    )


@@ -53,8 +42,8 @@ def save(form: dict) -> None:
    keys = [
        "sftp_host", "sftp_port", "sftp_user", "sftp_auth_method",
        "sftp_password", "sftp_remote_path",
-        "calibre_url", "calibre_user", "calibre_pass",
-        "local_work_dir", "scheduler_interval_minutes", "sync_batch_size",
+        "work_dir", "import_dir",
+        "scheduler_interval_minutes", "sync_batch_size",
    ]
    for key in keys:
        if key in form and form[key] is not None:
@@ -47,26 +47,16 @@ def init_db() -> None:
                error_msg    TEXT
            );

-            CREATE TABLE IF NOT EXISTS uploaded_books (
-                id           INTEGER PRIMARY KEY,
-                filename     TEXT NOT NULL,
-                file_hash    TEXT UNIQUE NOT NULL,
-                zip_source   TEXT,
-                uploaded_at  TEXT,
-                status       TEXT
-            );
-
            CREATE TABLE IF NOT EXISTS sync_runs (
-                id           INTEGER PRIMARY KEY,
-                started_at   TEXT NOT NULL,
-                finished_at  TEXT,
-                zips_found   INTEGER DEFAULT 0,
-                zips_new     INTEGER DEFAULT 0,
-                books_uploaded INTEGER DEFAULT 0,
-                books_skipped  INTEGER DEFAULT 0,
+                id            INTEGER PRIMARY KEY,
+                started_at    TEXT NOT NULL,
+                finished_at   TEXT,
+                zips_found    INTEGER DEFAULT 0,
+                zips_new      INTEGER DEFAULT 0,
+                books_imported INTEGER DEFAULT 0,
                books_errored  INTEGER DEFAULT 0,
-                status       TEXT DEFAULT 'running',
-                error_msg    TEXT
+                status        TEXT DEFAULT 'running',
+                error_msg     TEXT
            );

            CREATE TABLE IF NOT EXISTS remote_zip_cache (
@@ -171,40 +161,6 @@ def get_recent_zips(limit: int = 50) -> list[sqlite3.Row]:
        ).fetchall()


-# --- Uploaded books ---
-
-def is_book_uploaded(file_hash: str) -> bool:
-    with get_db() as conn:
-        row = conn.execute(
-            "SELECT id FROM uploaded_books WHERE file_hash = ? AND status IN ('uploaded', 'skipped_duplicate')",
-            (file_hash,),
-        ).fetchone()
-    return row is not None
-
-
-def record_book(filename: str, file_hash: str, zip_source: str, status: str) -> None:
-    with get_db() as conn:
-        conn.execute(
-            """INSERT INTO uploaded_books (filename, file_hash, zip_source, uploaded_at, status)
-               VALUES (?, ?, ?, ?, ?)
-               ON CONFLICT(file_hash) DO UPDATE SET status = excluded.status""",
-            (filename, file_hash, zip_source, _now(), status),
-        )
-
-
-def get_books(limit: int = 200, offset: int = 0) -> list[sqlite3.Row]:
-    with get_db() as conn:
-        return conn.execute(
-            "SELECT * FROM uploaded_books ORDER BY uploaded_at DESC LIMIT ? OFFSET ?",
-            (limit, offset),
-        ).fetchall()
-
-
-def get_books_count() -> int:
-    with get_db() as conn:
-        return conn.execute("SELECT COUNT(*) FROM uploaded_books").fetchone()[0]
-
-
 # --- Sync runs ---

 def start_sync_run() -> int:
@@ -233,35 +189,28 @@ def get_recent_runs(limit: int = 10) -> list[sqlite3.Row]:

 def get_stats() -> dict:
    with get_db() as conn:
-        total_books = conn.execute("SELECT COUNT(*) FROM uploaded_books").fetchone()[0]
-        uploaded = conn.execute(
-            "SELECT COUNT(*) FROM uploaded_books WHERE status = 'uploaded'"
-        ).fetchone()[0]
-        skipped = conn.execute(
-            "SELECT COUNT(*) FROM uploaded_books WHERE status = 'skipped_duplicate'"
-        ).fetchone()[0]
        total_zips = conn.execute("SELECT COUNT(*) FROM processed_zips").fetchone()[0]
+        total_imported = conn.execute(
+            "SELECT COALESCE(SUM(books_imported), 0) FROM sync_runs"
+        ).fetchone()[0]
        last_run = conn.execute(
            "SELECT started_at, status FROM sync_runs ORDER BY started_at DESC LIMIT 1"
        ).fetchone()
    return {
-        "total_books": total_books,
-        "uploaded": uploaded,
-        "skipped": skipped,
        "total_zips": total_zips,
+        "total_imported": total_imported,
        "last_run": dict(last_run) if last_run else None,
    }


 def clear_sync_data() -> dict:
-    """Delete all processed_zips, uploaded_books, and sync_runs rows. Settings are kept.
+    """Delete all processed_zips and sync_runs rows. Settings are kept.
    Also resets the remote scan timestamp so the next sync does a full rescan."""
    with get_db() as conn:
        zips = conn.execute("DELETE FROM processed_zips").rowcount
-        books = conn.execute("DELETE FROM uploaded_books").rowcount
        runs = conn.execute("DELETE FROM sync_runs").rowcount
        conn.execute("DELETE FROM settings WHERE key = 'remote_cache_last_scan'")
-    return {"zips": zips, "books": books, "runs": runs}
+    return {"zips": zips, "runs": runs}


 def _now() -> str:
@@ -6,6 +6,8 @@ services:
    volumes:
      # Persists the SQLite database and settings across container restarts
      - ./data:/app/data
+      # CWA import folder — set the host path to match your CWA ingest directory
+      - /path/to/cwa-import:/cwa-import
      # Optional: mount your SSH private key read-only instead of pasting it in the UI
      # - ~/.ssh/id_rsa:/run/secrets/ssh_key:ro
    restart: unless-stopped
@@ -12,8 +12,6 @@ import config
 import db
 import sftp as sftp_module
 import sync
-import uploader
-from uploader import CalibreClient, delete_book, fetch_all_books, find_duplicate_groups

 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s — %(message)s")
 log = logging.getLogger(__name__)
@@ -77,23 +75,6 @@ async def dashboard(request: Request):
    })


-# --- Books ---
-
-@app.get("/books", response_class=HTMLResponse)
-async def books_page(request: Request, page: int = 1):
-    per_page = 50
-    offset = (page - 1) * per_page
-    books = [dict(b) for b in db.get_books(limit=per_page, offset=offset)]
-    total = db.get_books_count()
-    pages = max(1, (total + per_page - 1) // per_page)
-    return templates.TemplateResponse(request, "books.html", {
-        "books": books,
-        "page": page,
-        "pages": pages,
-        "total": total,
-    })
-
-
 # --- Settings ---

@app.get("/settings", response_class=HTMLResponse)
@@ -117,10 +98,8 @@ async def save_settings(
    sftp_key: str = Form(""),
    sftp_password: str = Form(""),
    sftp_remote_path: str = Form(""),
-    calibre_url: str = Form(""),
-    calibre_user: str = Form(""),
-    calibre_pass: str = Form(""),
-    local_work_dir: str = Form("/tmp/calibresync"),
+    work_dir: str = Form("/tmp/calibresync"),
+    import_dir: str = Form(""),
    scheduler_interval_minutes: str = Form("0"),
    sync_batch_size: str = Form("0"),
 ):
@@ -132,10 +111,8 @@ async def save_settings(
        "sftp_key": sftp_key,
        "sftp_password": sftp_password,
        "sftp_remote_path": sftp_remote_path,
-        "calibre_url": calibre_url,
-        "calibre_user": calibre_user,
-        "calibre_pass": calibre_pass,
-        "local_work_dir": local_work_dir,
+        "work_dir": work_dir,
+        "import_dir": import_dir,
        "scheduler_interval_minutes": scheduler_interval_minutes,
        "sync_batch_size": sync_batch_size,
    })
@@ -179,111 +156,6 @@ async def test_ssh():
    return {"ok": ok, "message": message}


-@app.get("/api/test/calibre")
-async def test_calibre():
-    cfg = config.load()
-    ok, message = uploader.test_connection(cfg.calibre)
-    return {"ok": ok, "message": message}
-
-
-# --- Duplicates ---
-
-@app.get("/duplicates", response_class=HTMLResponse)
-async def duplicates_page(request: Request):
-    cfg = config.load()
-    error = None
-    groups: list = []
-    total_books = 0
-    try:
-        books = fetch_all_books(cfg.calibre)
-        total_books = len(books)
-        groups = find_duplicate_groups(books)
-    except Exception as e:
-        error = str(e)
-    return templates.TemplateResponse(request, "duplicates.html", {
-        "groups": groups,
-        "total_books": total_books,
-        "error": error,
-    })
-
-
-@app.post("/api/delete_book/{book_id}")
-async def delete_book_api(book_id: int):
-    cfg = config.load()
-    ok, message = delete_book(cfg.calibre, book_id)
-    return {"ok": ok, "message": message}
-
-
-_dedup_state: dict = {"running": False, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None}
-
-
-def _run_dedup():
-    global _dedup_state
-    try:
-        cfg = config.load()
-        log.info("Dedup: fetching all books ...")
-        client = CalibreClient(cfg.calibre)
-        client._ensure_auth()
-        books = fetch_all_books(cfg.calibre)
-        groups = find_duplicate_groups(books)
-        to_delete = [b for group in groups for b in sorted(group, key=lambda x: x.get("id", 0))[1:]]
-        _dedup_state.update({"total": len(to_delete), "deleted": 0, "failed": 0})
-        log.info("Dedup: %d duplicate(s) to delete across %d group(s)", len(to_delete), len(groups))
-        for book in to_delete:
-            ok, msg = delete_book(cfg.calibre, book["id"], client)
-            if ok:
-                _dedup_state["deleted"] += 1
-            else:
-                _dedup_state["failed"] += 1
-                log.warning("Dedup: failed to delete book %d: %s", book["id"], msg)
-            if _dedup_state["deleted"] % 10 == 0:
-                log.info("Dedup progress: %d / %d deleted", _dedup_state["deleted"], _dedup_state["total"])
-        log.info("Dedup done: %d deleted, %d failed", _dedup_state["deleted"], _dedup_state["failed"])
-    except Exception as e:
-        log.error("Dedup error: %s", e)
-        _dedup_state["error"] = str(e)
-    finally:
-        _dedup_state["running"] = False
-        _dedup_state["done"] = True
-
-
-@app.post("/api/delete_duplicates")
-async def delete_duplicates_api(background_tasks: BackgroundTasks):
-    if _dedup_state["running"]:
-        return {"ok": False, "message": "Already running"}
-    _dedup_state.update({"running": True, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None})
-    background_tasks.add_task(_run_dedup)
-    return {"ok": True, "message": "Started"}
-
-
-@app.get("/api/delete_duplicates/status")
-async def delete_duplicates_status():
-    return _dedup_state
-
-
-@app.get("/api/debug/calibre_books")
-async def debug_calibre_books():
-    """Show raw Calibre-Web listbooks response shape so we can identify field names."""
-    cfg = config.load()
-    from uploader import CalibreClient
-    client = CalibreClient(cfg.calibre)
-    client._ensure_auth()
-    resp = client._session.get(
-        f"{cfg.calibre.url}/ajax/listbooks",
-        params={"draw": 1, "start": 0, "length": 5, "sort": "title", "order": "asc"},
-        timeout=30,
-    )
-    data = resp.json()
-    non_list = {k: v for k, v in data.items() if not isinstance(v, list)}
-    list_keys = {k: len(v) for k, v in data.items() if isinstance(v, list)}
-    return {
-        "http_status": resp.status_code,
-        "top_level_keys": list(data.keys()),
-        "non_list_fields": non_list,
-        "list_fields_lengths": list_keys,
-    }
-
-
 # --- Data reset ---

@app.post("/settings/reset-sync-data")
@@ -4,5 +4,4 @@ jinja2
 python-multipart
 paramiko
 rarfile
-requests
 apscheduler
@@ -1,4 +1,5 @@
 import logging
+import shutil
 import threading
 import time
 from pathlib import Path
@@ -7,7 +8,6 @@ import config
 import db
 import extractor
 import sftp as sftp_module
-from uploader import CalibreClient, CalibreUnavailableError

 log = logging.getLogger(__name__)

@@ -33,22 +33,23 @@ def run_sync(limit: int | None = None) -> None:

    _running = True
    run_id = db.start_sync_run()
-    counters = dict(zips_found=0, zips_new=0, books_uploaded=0, books_skipped=0, books_errored=0)
+    counters = dict(zips_found=0, zips_new=0, books_imported=0, books_errored=0)

    try:
        log.info("Sync started (limit=%s)", limit)
        cfg = config.load()
        _validate_config(cfg)
-        log.info("Config OK — work dir: %s", cfg.local_work_dir)
+        log.info("Config OK — work dir: %s, import dir: %s", cfg.work_dir, cfg.import_dir)

-        work_dir = Path(cfg.local_work_dir)
+        work_dir = Path(cfg.work_dir)
        work_dir.mkdir(parents=True, exist_ok=True)
-        log.info("Work dir ready: %s", work_dir)
+
+        import_dir = Path(cfg.import_dir)
+        import_dir.mkdir(parents=True, exist_ok=True)

        log.info("Connecting to SFTP %s@%s:%s ...", cfg.sftp.user, cfg.sftp.host, cfg.sftp.port)
        new_zips = sftp_module.list_new_zips(cfg.sftp, max_results=limit)
        counters["zips_found"] = len(new_zips)
-
        counters["zips_new"] = len(new_zips)

        if not new_zips:
@@ -56,21 +57,11 @@ def run_sync(limit: int | None = None) -> None:
            db.finish_sync_run(run_id, status="success", **counters)
            return

-        # Determine chunk size; 0 means process everything in one chunk
        batch_size = int(db.get_setting("sync_batch_size", "0") or "0")
        if batch_size <= 0:
            batch_size = len(new_zips)

-        total_batches = -(-len(new_zips) // batch_size)  # ceiling division
-        client = CalibreClient(cfg.calibre)
-
-        # Pre-load existing book titles so duplicate detection doesn't need per-book OPDS searches
-        try:
-            from uploader import fetch_all_books
-            existing = fetch_all_books(cfg.calibre)
-            client.preload_existing_titles(existing)
-        except Exception as exc:
-            log.warning("Could not pre-load existing books (%s) — will fall back to per-book OPDS search", exc)
+        total_batches = -(-len(new_zips) // batch_size)

        for batch_num, i in enumerate(range(0, len(new_zips), batch_size), start=1):
            chunk = new_zips[i : i + batch_size]
@@ -89,34 +80,21 @@ def run_sync(limit: int | None = None) -> None:
                    books = extractor.extract(local_zip, work_dir / "extracted")
                    log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books))

-                    books_errored_this_zip = 0
                    for book in books:
-                        t2 = time.monotonic()
-                        status = client.upload(book, zip_source=remote_zip.remote_path)
-                        log.info("Upload '%s' → %s (%.1fs)", book.name, status, time.monotonic() - t2)
-                        time.sleep(2)
-                        if status == "uploaded":
-                            counters["books_uploaded"] += 1
-                        elif status == "skipped_duplicate":
-                            counters["books_skipped"] += 1
+                        dest = import_dir / book.name
+                        if dest.exists():
+                            log.info("Skipping '%s' — already exists in import dir", book.name)
                        else:
-                            counters["books_errored"] += 1
-                            books_errored_this_zip += 1
-
-                    if books_errored_this_zip:
-                        zip_status = "error"
-                        zip_error = f"{books_errored_this_zip} book upload(s) failed — will retry next sync"
+                            shutil.move(str(book), str(dest))
+                            log.info("Moved '%s' → %s", book.name, import_dir)
+                            counters["books_imported"] += 1

                    extractor.cleanup(work_dir / "extracted" / local_zip.stem)
-                except CalibreUnavailableError as e:
-                    log.error("Calibre-Web unavailable — aborting sync run: %s", e)
-                    db.mark_zip_processed(remote_zip.remote_path, remote_zip.file_size, "error", str(e))
-                    db.finish_sync_run(run_id, status="error", error_msg=str(e), **counters)
-                    return
                except Exception as e:
                    log.error("Error processing %s: %s", remote_zip.remote_path, e)
                    zip_status = "error"
                    zip_error = str(e)
+                    counters["books_errored"] += 1
                finally:
                    if local_zip and local_zip.exists():
                        extractor.cleanup(local_zip)
@@ -126,9 +104,8 @@ def run_sync(limit: int | None = None) -> None:

        db.finish_sync_run(run_id, status="success", **counters)
        log.info(
-            "Sync complete. Total zips: %d, Uploaded: %d, Skipped: %d, Errors: %d",
-            counters["zips_new"], counters["books_uploaded"],
-            counters["books_skipped"], counters["books_errored"],
+            "Sync complete. Total zips: %d, Imported: %d, Errors: %d",
+            counters["zips_new"], counters["books_imported"], counters["books_errored"],
        )
    except Exception as e:
        log.exception("Sync run failed: %s", e)
@@ -150,9 +127,7 @@ def _validate_config(cfg) -> None:
        missing.append("SSH private key")
    if cfg.sftp.auth_method == "password" and not cfg.sftp.password:
        missing.append("SSH password")
-    if not cfg.calibre.url:
-        missing.append("Calibre-Web URL")
-    if not cfg.calibre.user:
-        missing.append("Calibre-Web username")
+    if not cfg.import_dir:
+        missing.append("CWA import folder")
    if missing:
        raise ValueError(f"Missing configuration: {', '.join(missing)}")
@@ -1,46 +0,0 @@
-{% extends "base.html" %}
-{% block title %}Books — CalibreSync{% endblock %}
-
-{% block content %}
-<div class="page-header">
-  <h1>Books <span class="muted">({{ total }})</span></h1>
-</div>
-
-{% if books %}
-<table>
-  <thead>
-    <tr>
-      <th>Filename</th>
-      <th>Status</th>
-      <th>Source zip</th>
-      <th>Uploaded</th>
-    </tr>
-  </thead>
-  <tbody>
-    {% for b in books %}
-    <tr>
-      <td>{{ b.filename }}</td>
-      <td><span class="badge badge-{{ b.status }}">{{ b.status }}</span></td>
-      <td class="mono small muted">{{ b.zip_source or "—" }}</td>
-      <td>{{ b.uploaded_at[:19].replace("T"," ") if b.uploaded_at else "—" }}</td>
-    </tr>
-    {% endfor %}
-  </tbody>
-</table>
-
-{% if pages > 1 %}
-<div class="pagination">
-  {% if page > 1 %}
-    <a href="/books?page={{ page - 1 }}">&laquo; Prev</a>
-  {% endif %}
-  <span>Page {{ page }} of {{ pages }}</span>
-  {% if page < pages %}
-    <a href="/books?page={{ page + 1 }}">Next &raquo;</a>
-  {% endif %}
-</div>
-{% endif %}
-
-{% else %}
-  <p class="muted">No books recorded yet.</p>
-{% endif %}
-{% endblock %}
@@ -1,124 +0,0 @@
-{% extends "base.html" %}
-{% block title %}Duplicates — CalibreSync{% endblock %}
-
-{% block content %}
-<div class="page-header">
-  <h1>Duplicate books in Calibre-Web</h1>
-  {% if groups %}
-  <div class="header-actions">
-    <button class="btn btn-danger" onclick="deleteAll(this)">Delete all duplicates (keep oldest)</button>
-  </div>
-  {% endif %}
-</div>
-
-<div id="dedup-progress" style="display:none" class="alert alert-success"></div>
-
-{% if error %}
-  <div class="alert alert-warning">Could not fetch books from Calibre-Web: {{ error }}</div>
-{% else %}
-  <p class="muted small" style="margin-bottom:1.5rem">
-    Scanned <strong>{{ total_books }}</strong> book(s) —
-    {% if groups %}
-      found <strong>{{ groups|length }}</strong> duplicate group(s) (same title + author).
-      The oldest copy (lowest ID) is kept when deleting all.
-    {% else %}
-      no duplicates found.
-    {% endif %}
-  </p>
-
-  {% for group in groups %}
-  <div class="form-section" style="margin-bottom:1rem">
-    <h3 style="margin-top:0">{{ group[0].title }}</h3>
-    <table>
-      <thead>
-        <tr>
-          <th>ID</th>
-          <th>Title</th>
-          <th>Authors</th>
-          <th>Format</th>
-          <th></th>
-        </tr>
-      </thead>
-      <tbody>
-        {% for book in group %}
-        <tr id="row-{{ book.id }}">
-          <td class="mono muted">{{ book.id }}</td>
-          <td>{{ book.title }}</td>
-          <td>{{ book.authors }}</td>
-          <td>{{ book.format or "—" }}</td>
-          <td>
-            <button class="btn btn-danger" style="padding:0.2rem 0.7rem;font-size:0.85rem"
-                    onclick="deleteBook({{ book.id }}, this)">Delete</button>
-            <span id="status-{{ book.id }}" class="muted small" style="margin-left:0.5rem"></span>
-          </td>
-        </tr>
-        {% endfor %}
-      </tbody>
-    </table>
-  </div>
-  {% endfor %}
-{% endif %}
-
-<script>
-async function deleteBook(id, btn) {
-  if (!confirm('Delete book ID ' + id + ' from Calibre-Web?')) return;
-  btn.disabled = true;
-  btn.textContent = 'Deleting…';
-  const status = document.getElementById('status-' + id);
-  try {
-    const r = await fetch('/api/delete_book/' + id, {method: 'POST'});
-    const data = await r.json();
-    if (data.ok) {
-      document.getElementById('row-' + id).style.opacity = '0.35';
-      btn.textContent = 'Deleted';
-      status.textContent = '✓';
-    } else {
-      btn.disabled = false;
-      btn.textContent = 'Delete';
-      status.textContent = 'Failed: ' + data.message;
-      status.style.color = 'var(--error, #f87171)';
-    }
-  } catch (e) {
-    btn.disabled = false;
-    btn.textContent = 'Delete';
-    status.textContent = 'Error: ' + e;
-  }
-}
-
-async function deleteAll(btn) {
-  if (!confirm('Delete all duplicates from Calibre-Web, keeping the oldest copy of each title+author? This cannot be undone.')) return;
-  btn.disabled = true;
-  btn.textContent = 'Starting…';
-  const progress = document.getElementById('dedup-progress');
-  progress.style.display = '';
-  progress.textContent = 'Fetching book list from Calibre-Web…';
-
-  await fetch('/api/delete_duplicates', {method: 'POST'});
-
-  const poll = setInterval(async () => {
-    const r = await fetch('/api/delete_duplicates/status');
-    const s = await r.json();
-    if (s.error) {
-      clearInterval(poll);
-      progress.textContent = 'Error: ' + s.error;
-      progress.className = 'alert alert-warning';
-      btn.disabled = false;
-      btn.textContent = 'Delete all duplicates (keep oldest)';
-      return;
-    }
-    if (s.total > 0) {
-      progress.textContent = `Deleting… ${s.deleted} / ${s.total} deleted, ${s.failed} failed`;
-    } else {
-      progress.textContent = 'Scanning for duplicates…';
-    }
-    if (s.done) {
-      clearInterval(poll);
-      progress.textContent = `Done — ${s.deleted} book(s) deleted, ${s.failed} failed. Reload to refresh the list.`;
-      btn.textContent = 'Reload';
-      btn.disabled = false;
-      btn.onclick = () => location.reload();
-    }
-  }, 2000);
-}
-</script>
-{% endblock %}
@@ -59,16 +59,8 @@
    <div class="stat-label">Zip archives processed</div>
  </div>
  <div class="stat-card">
-    <div class="stat-value">{{ stats.uploaded }}</div>
-    <div class="stat-label">Books uploaded</div>
-  </div>
-  <div class="stat-card">
-    <div class="stat-value">{{ stats.skipped }}</div>
-    <div class="stat-label">Duplicates skipped</div>
-  </div>
-  <div class="stat-card">
-    <div class="stat-value">{{ stats.total_books }}</div>
-    <div class="stat-label">Total book records</div>
+    <div class="stat-value">{{ stats.total_imported }}</div>
+    <div class="stat-label">Books imported</div>
  </div>
 </div>

@@ -81,8 +73,7 @@
      <th>Finished</th>
      <th>Status</th>
      <th>New zips</th>
-      <th>Uploaded</th>
-      <th>Skipped</th>
+      <th>Imported</th>
      <th>Errors</th>
    </tr>
  </thead>
@@ -93,8 +84,7 @@
      <td>{{ r.finished_at[:19].replace("T"," ") if r.finished_at else "—" }}</td>
      <td><span class="badge badge-{{ r.status }}">{{ r.status }}</span></td>
      <td>{{ r.zips_new }}</td>
-      <td>{{ r.books_uploaded }}</td>
-      <td>{{ r.books_skipped }}</td>
+      <td>{{ r.books_imported }}</td>
      <td>{{ r.books_errored }}</td>
    </tr>
    {% endfor %}
@@ -84,39 +84,20 @@
    </div>
  </section>

-  <section class="form-section">
-    <h2>Calibre-Web</h2>
-
-    <div class="form-row">
-      <label for="calibre_url">URL</label>
-      <input id="calibre_url" name="calibre_url" type="url" placeholder="http://localhost:8083"
-             value="{{ s.get('calibre_url','') }}">
-    </div>
-
-    <div class="form-row">
-      <label for="calibre_user">Username</label>
-      <input id="calibre_user" name="calibre_user" type="text" value="{{ s.get('calibre_user','') }}">
-    </div>
-
-    <div class="form-row">
-      <label for="calibre_pass">Password</label>
-      <input id="calibre_pass" name="calibre_pass" type="password"
-             value="{{ s.get('calibre_pass','') }}">
-    </div>
-
-    <div class="form-row">
-      <button type="button" class="btn btn-secondary" onclick="testConn('calibre', this)">Test Calibre-Web connection</button>
-      <p id="test-calibre-result" class="test-result"></p>
-    </div>
-  </section>
-
  <section class="form-section">
    <h2>Local</h2>

    <div class="form-row">
-      <label for="local_work_dir">Work directory</label>
-      <input id="local_work_dir" name="local_work_dir" type="text" placeholder="/tmp/calibresync"
-             value="{{ s.get('local_work_dir','/tmp/calibresync') }}">
+      <label for="import_dir">CWA import folder</label>
+      <input id="import_dir" name="import_dir" type="text" placeholder="/mnt/cwa-import"
+             value="{{ s.get('import_dir','') }}">
+      <p class="muted small">Folder watched by Calibre-Web-Automated. Extracted epub/pdf files are moved here flat.</p>
+    </div>
+
+    <div class="form-row">
+      <label for="work_dir">Temp work directory</label>
+      <input id="work_dir" name="work_dir" type="text" placeholder="/tmp/calibresync"
+             value="{{ s.get('work_dir','/tmp/calibresync') }}">
      <p class="muted small">Temporary storage for downloaded zips and extracted files. Cleaned up after each run.</p>
    </div>
  </section>
@@ -182,7 +163,7 @@ async function testConn(type, btn) {
    result.className = "test-result test-fail";
  } finally {
    btn.disabled = false;
-    btn.textContent = type === "ssh" ? "Test SSH connection" : "Test Calibre-Web connection";
+    btn.textContent = "Test SSH connection";
  }
 }
 </script>
@@ -1,413 +0,0 @@
-import hashlib
-import logging
-import re
-import time
-import unicodedata
-from pathlib import Path
-from urllib.parse import quote
-
-import requests
-
-import db
-from config import CalibreConfig
-
-log = logging.getLogger(__name__)
-
-MIME_TYPES = {
-    ".epub": "application/epub+zip",
-    ".pdf": "application/pdf",
-}
-
-# Words stripped before comparing titles — release-group tags, language codes, format names, etc.
-_JUNK_WORDS = {
-    "retail", "epub", "ebook", "pdf", "mobi", "azw3", "decipher",
-    "swedish", "english", "danish", "norwegian", "finnish", "german", "french",
-    "the", "a", "an", "och", "und", "les", "der", "die", "das",
-}
-
-
-class CalibreUnavailableError(RuntimeError):
-    """Raised when Calibre-Web returns repeated 502/503/504 — sync run should abort."""
-
-
-class CalibreClient:
-    def __init__(self, cfg: CalibreConfig):
-        self._cfg = cfg
-        self._session = requests.Session()
-        self._authenticated = False
-        self._upload_csrf: str | None = None
-        self._consecutive_failures = 0
-        # Pre-loaded title word-sets for fast duplicate detection (set by preload_existing_titles)
-        self._existing_title_sets: list[frozenset[str]] | None = None
-
-    def preload_existing_titles(self, books: list[dict]) -> None:
-        """Build an in-memory index of normalised title keywords from a pre-fetched book list."""
-        self._existing_title_sets = [
-            frozenset(_normalize_words(b.get("title", "")))
-            for b in books
-            if b.get("title")
-        ]
-        log.info("Pre-loaded %d existing book titles for duplicate detection", len(self._existing_title_sets))
-
-    def _ensure_auth(self) -> None:
-        if self._authenticated:
-            return
-        login_url = f"{self._cfg.url}/login"
-        page = self._session.get(login_url, timeout=30)
-        page.raise_for_status()
-        csrf = _extract_csrf(page.text)
-
-        data = {"username": self._cfg.user, "password": self._cfg.password}
-        if csrf:
-            data["csrf_token"] = csrf
-
-        resp = self._session.post(login_url, data=data, allow_redirects=True, timeout=30)
-        resp.raise_for_status()
-        if resp.url.rstrip("/").endswith("/login"):
-            raise RuntimeError("Calibre-Web authentication failed — check credentials")
-        self._authenticated = True
-        self._upload_csrf = _extract_csrf(resp.text) or csrf
-        log.info("Authenticated to Calibre-Web at %s", self._cfg.url)
-
-    def _exists_in_calibre(self, filename: str) -> bool:
-        """Check whether a book already exists in Calibre-Web. Returns True if likely duplicate."""
-        keywords = _keywords_from_filename(filename)
-        if len(keywords) < 2:
-            return False
-        our_words = set(keywords)
-
-        # Fast path: check pre-loaded title index (available when sync pre-fetches all books)
-        if self._existing_title_sets is not None:
-            for their_words in self._existing_title_sets:
-                if not their_words:
-                    continue
-                overlap = len(our_words & their_words)
-                # Match if: 3+ words in common, OR 60%+ of filename keywords match the title,
-                # OR 60%+ of the stored title's words appear in the filename keywords.
-                # The third condition catches short titles drowned out by filename noise.
-                if (overlap >= 3
-                        or (overlap / len(our_words) >= 0.6)
-                        or (len(their_words) >= 2 and overlap / len(their_words) >= 0.6)):
-                    log.info("Duplicate (preloaded index): '%s'", filename)
-                    return True
-            return False
-
-        # Slow path fallback: OPDS search (used when no index is available)
-        query = " ".join(keywords[:6])
-        try:
-            resp = self._session.get(
-                f"{self._cfg.url}/opds/search/{quote(query, safe='')}",
-                auth=(self._cfg.user, self._cfg.password),
-                timeout=15,
-            )
-            if resp.status_code == 404:
-                return False
-            calibre_titles = _parse_opds_titles(resp.text)
-            if not calibre_titles:
-                return False
-
-            for title in calibre_titles:
-                their_words = set(_normalize_words(title))
-                if not their_words:
-                    continue
-                overlap = len(our_words & their_words)
-                if (overlap >= 3
-                        or (overlap / len(our_words) >= 0.6)
-                        or (len(their_words) >= 2 and overlap / len(their_words) >= 0.6)):
-                    log.info("Duplicate (OPDS search): '%s'", filename)
-                    return True
-        except Exception as e:
-            log.warning("OPDS search failed for '%s': %s — proceeding with upload", filename, e)
-        return False
-
-    def upload(self, book_path: Path, zip_source: str) -> str:
-        """Upload a book file. Returns status: 'uploaded' | 'skipped_duplicate' | 'error'."""
-        file_hash = _sha256(book_path)
-
-        # Primary guard: hash already in our DB
-        if db.is_book_uploaded(file_hash):
-            log.info("Skipping (already uploaded): %s", book_path.name)
-            db.record_book(book_path.name, file_hash, zip_source, "skipped_duplicate")
-            return "skipped_duplicate"
-
-        try:
-            self._ensure_auth()
-
-            # Secondary guard: title search in Calibre-Web (catches pre-existing books)
-            if self._exists_in_calibre(book_path.name):
-                log.info("Skipping (exists in Calibre-Web): %s", book_path.name)
-                db.record_book(book_path.name, file_hash, zip_source, "skipped_duplicate")
-                return "skipped_duplicate"
-
-            mime = MIME_TYPES.get(book_path.suffix.lower(), "application/octet-stream")
-            for attempt in range(1, 4):
-                try:
-                    with book_path.open("rb") as fh:
-                        resp = self._session.post(
-                            f"{self._cfg.url}/upload",
-                            files={"btn-upload": (book_path.name, fh, mime)},
-                            data={"csrf_token": self._upload_csrf} if self._upload_csrf else {},
-                            timeout=120,
-                        )
-                    if not resp.ok:
-                        log.error("Upload HTTP %s (attempt %d/3) — body: %s", resp.status_code, attempt, resp.text[:300])
-                    resp.raise_for_status()
-                    log.info("Uploaded: %s", book_path.name)
-                    self._consecutive_failures = 0
-                    db.record_book(book_path.name, file_hash, zip_source, "uploaded")
-                    # Add to in-session index so a later zip with the same title is skipped
-                    if self._existing_title_sets is not None:
-                        kw = frozenset(_keywords_from_filename(book_path.name))
-                        if kw:
-                            self._existing_title_sets.append(kw)
-                    return "uploaded"
-                except requests.HTTPError:
-                    if resp.status_code in (502, 503, 504):
-                        if attempt < 3:
-                            log.warning("HTTP %s on attempt %d/3 — retrying in 180s ...", resp.status_code, attempt)
-                            time.sleep(180)
-                            continue
-                        # All retries exhausted
-                        self._consecutive_failures += 1
-                        if self._consecutive_failures >= 3:
-                            raise CalibreUnavailableError(
-                                f"Calibre-Web returned {resp.status_code} on {self._consecutive_failures} "
-                                "consecutive books — aborting sync run"
-                            )
-                        break
-                    if resp.status_code == 400 and attempt == 1:
-                        log.warning("HTTP 400 — CSRF token likely expired, re-authenticating ...")
-                        self._authenticated = False
-                        self._upload_csrf = None
-                        self._ensure_auth()
-                        continue
-                    break
-
-            db.record_book(book_path.name, file_hash, zip_source, "error")
-            return "error"
-        except CalibreUnavailableError:
-            db.record_book(book_path.name, file_hash, zip_source, "error")
-            raise
-        except Exception as e:
-            log.error("Upload failed for %s: %s", book_path.name, e)
-            db.record_book(book_path.name, file_hash, zip_source, "error")
-            return "error"
-
-
-def fetch_all_books(cfg: CalibreConfig) -> list[dict]:
-    """Fetch every book from Calibre-Web. Tries /ajax/listbooks first; falls back to OPDS if pagination is broken."""
-    client = CalibreClient(cfg)
-    client._ensure_auth()
-    all_books: list[dict] = []
-    seen_ids: set = set()
-    page_size = 1000
-    start = 0
-    reported_total = 0
-    while True:
-        resp = client._session.get(
-            f"{cfg.url}/ajax/listbooks",
-            params={
-                "draw": 1,
-                "start": start, "length": page_size,
-                "iDisplayStart": start, "iDisplayLength": page_size,
-            },
-            timeout=60,
-        )
-        resp.raise_for_status()
-        data = resp.json()
-        if start == 0:
-            non_list = {k: v for k, v in data.items() if not isinstance(v, list)}
-            log.info("listbooks page-0 meta fields: %s", non_list)
-        rows = data.get("rows") or data.get("data") or []
-        reported_total = (
-            data.get("recordsTotal") or data.get("total_count") or
-            data.get("total") or data.get("totalNotFiltered") or 0
-        )
-        new_in_page = 0
-        for b in rows:
-            bid = b.get("id")
-            if bid not in seen_ids:
-                seen_ids.add(bid)
-                all_books.append(b)
-                new_in_page += 1
-        log.info("Books fetched: %d / %d (page gave %d new)", len(all_books), reported_total, new_in_page)
-        if not rows or new_in_page == 0 or len(all_books) >= reported_total:
-            break
-        start += len(rows)
-
-    # If we got far fewer books than reported, listbooks pagination is broken — use OPDS instead
-    if reported_total > 0 and len(all_books) < reported_total // 2:
-        log.warning(
-            "listbooks pagination broken (%d/%d books retrieved). Falling back to OPDS.",
-            len(all_books), reported_total,
-        )
-        return _fetch_all_books_opds(cfg)
-    return all_books
-
-
-def _fetch_all_books_opds(cfg: CalibreConfig) -> list[dict]:
-    """Fetch all books via OPDS catalog, following next-page links."""
-    import xml.etree.ElementTree as ET
-    books: list[dict] = []
-    seen_ids: set = set()
-    url: str | None = f"{cfg.url}/opds/new"
-    auth = (cfg.user, cfg.password)
-    session = requests.Session()
-
-    while url:
-        resp = session.get(url, auth=auth, timeout=30)
-        if not resp.ok:
-            log.warning("OPDS fetch failed HTTP %s — %s", resp.status_code, url)
-            break
-        try:
-            root = ET.fromstring(resp.content)
-        except ET.ParseError as exc:
-            log.warning("OPDS XML parse error: %s", exc)
-            break
-
-        next_url: str | None = None
-        entries_this_page = 0
-        for elem in root:
-            local = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
-            if local == "link" and elem.get("rel") == "next":
-                href = elem.get("href", "")
-                next_url = href if href.startswith("http") else f"{cfg.url}{href}"
-            elif local == "entry":
-                entries_this_page += 1
-                title = ""
-                author_parts: list[str] = []
-                book_id: int | None = None
-                for child in elem:
-                    ctag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
-                    if ctag == "title":
-                        title = child.text or ""
-                    elif ctag == "author":
-                        for gc in child:
-                            if (gc.tag.split("}")[-1] if "}" in gc.tag else gc.tag) == "name":
-                                author_parts.append(gc.text or "")
-                    elif ctag == "link":
-                        m = re.search(r"/download/(\d+)/", child.get("href", ""))
-                        if m and book_id is None:
-                            book_id = int(m.group(1))
-                if book_id and book_id not in seen_ids:
-                    seen_ids.add(book_id)
-                    books.append({"id": book_id, "title": title, "authors": " & ".join(author_parts)})
-
-        log.info("OPDS fetched: %d books total (page had %d entries)", len(books), entries_this_page)
-        if not entries_this_page:
-            break
-        url = next_url
-
-    return books
-
-
-def delete_book(cfg: CalibreConfig, book_id: int, client: "CalibreClient | None" = None) -> tuple[bool, str]:
-    """Delete a book from Calibre-Web by ID. Pass a pre-authenticated client to avoid re-auth overhead."""
-    if client is None:
-        client = CalibreClient(cfg)
-        client._ensure_auth()
-    csrf = client._upload_csrf
-    if not csrf:
-        # Try to fetch a CSRF token from the book detail page
-        try:
-            page = client._session.get(f"{cfg.url}/book/{book_id}", timeout=15)
-            csrf = _extract_csrf(page.text)
-            client._upload_csrf = csrf
-        except Exception:
-            pass
-    for attempt in range(2):
-        resp = client._session.post(
-            f"{cfg.url}/delete/{book_id}",
-            data={"csrf_token": csrf} if csrf else {},
-            timeout=30,
-        )
-        if resp.ok:
-            return True, "Deleted"
-        if resp.status_code == 400 and attempt == 0:
-            # CSRF token likely expired; re-authenticate and retry once
-            log.info("delete_book: 400 on book %d — refreshing CSRF and retrying", book_id)
-            client._authenticated = False
-            client._upload_csrf = None
-            client._ensure_auth()
-            csrf = client._upload_csrf
-            continue
-        return False, f"HTTP {resp.status_code}"
-    return False, "HTTP 400 after re-auth retry"
-
-
-def find_duplicate_groups(books: list[dict]) -> list[list[dict]]:
-    """Group books by normalised title+author; return only groups with 2+ entries."""
-    from collections import defaultdict
-    groups: dict[str, list[dict]] = defaultdict(list)
-    for book in books:
-        title = re.sub(r"[^\w\s]", " ", book.get("title", "").lower())
-        title = re.sub(r"\s+", " ", title).strip()
-        authors = re.sub(r"[^\w\s]", " ", book.get("authors", "").lower())
-        authors = re.sub(r"\s+", " ", authors).strip()
-        key = f"{title}||{authors}"
-        if title:
-            groups[key].append(book)
-    return sorted(
-        [g for g in groups.values() if len(g) > 1],
-        key=lambda g: g[0].get("title", "").lower(),
-    )
-
-
-def test_connection(cfg: CalibreConfig) -> tuple[bool, str]:
-    try:
-        client = CalibreClient(cfg)
-        client._ensure_auth()
-        return True, f"Authenticated to {cfg.url} as '{cfg.user}'."
-    except Exception as e:
-        return False, str(e)
-
-
-# --- Helpers ---
-
-def _ascii_fold(s: str) -> str:
-    """Strip accents: 'världens' → 'varldens', 'väg' → 'vag'."""
-    return "".join(c for c in unicodedata.normalize("NFKD", s) if unicodedata.category(c) != "Mn")
-
-
-def _keywords_from_filename(filename: str) -> list[str]:
-    """Extract meaningful words from a release-style filename for OPDS search."""
-    stem = _ascii_fold(Path(filename).stem.lower())
-    stem = re.sub(r"[._\-]", " ", stem)
-    stem = re.sub(r"[^\w\s]", "", stem)
-    words = stem.split()
-    return [
-        w for w in words
-        if w not in _JUNK_WORDS
-        and not re.match(r"^\d{4}$", w)
-        and not re.match(r"^\d+$", w)
-        and len(w) > 1
-    ]
-
-
-def _normalize_words(title: str) -> list[str]:
-    """Normalize a Calibre-Web title for comparison."""
-    title = _ascii_fold(title.lower())
-    title = re.sub(r"[^\w\s]", "", title)
-    return [w for w in title.split() if w not in _JUNK_WORDS and len(w) > 1]
-
-
-def _parse_opds_titles(xml: str) -> list[str]:
-    """Extract book titles from an OPDS Atom feed, skipping the feed title itself."""
-    # Grab all <title> elements; the first is the feed title ("Search results"), rest are books
-    titles = re.findall(r"<title>([^<]+)</title>", xml)
-    return titles[1:] if len(titles) > 1 else []
-
-
-def _extract_csrf(html: str) -> str | None:
-    m = re.search(r'name="csrf_token"\s+value="([^"]+)"', html)
-    if not m:
-        m = re.search(r'value="([^"]+)"\s+name="csrf_token"', html)
-    return m.group(1) if m else None
-
-
-def _sha256(path: Path) -> str:
-    h = hashlib.sha256()
-    with path.open("rb") as f:
-        for chunk in iter(lambda: f.read(65536), b""):
-            h.update(chunk)
-    return h.hexdigest()