diff --git a/config.py b/config.py index adfeca5..d48a621 100644 --- a/config.py +++ b/config.py @@ -14,11 +14,19 @@ class SFTPConfig: remote_path: str = "" +@dataclass +class GrimmoryConfig: + url: str = "" + user: str = "" + password: str = "" + bookdrop_path: str = "" + + @dataclass class AppConfig: sftp: SFTPConfig = field(default_factory=SFTPConfig) + grimmory: GrimmoryConfig = field(default_factory=GrimmoryConfig) work_dir: str = "/tmp/calibresync" - import_dir: str = "" def load() -> AppConfig: @@ -33,8 +41,13 @@ def load() -> AppConfig: password=s.get("sftp_password", ""), remote_path=s.get("sftp_remote_path", ""), ), + grimmory=GrimmoryConfig( + url=s.get("grimmory_url", ""), + user=s.get("grimmory_user", ""), + password=s.get("grimmory_password", ""), + bookdrop_path=s.get("grimmory_bookdrop_path", ""), + ), work_dir=s.get("work_dir", "/tmp/calibresync"), - import_dir=s.get("import_dir", ""), ) @@ -42,7 +55,8 @@ def save(form: dict) -> None: keys = [ "sftp_host", "sftp_port", "sftp_user", "sftp_auth_method", "sftp_password", "sftp_remote_path", - "work_dir", "import_dir", + "grimmory_url", "grimmory_user", "grimmory_password", "grimmory_bookdrop_path", + "work_dir", "scheduler_interval_minutes", "sync_batch_size", ] for key in keys: diff --git a/db.py b/db.py index a335808..5edc239 100644 --- a/db.py +++ b/db.py @@ -64,6 +64,16 @@ def init_db() -> None: file_size INTEGER NOT NULL, cached_at TEXT NOT NULL ); + + CREATE TABLE IF NOT EXISTS processed_books ( + id INTEGER PRIMARY KEY, + zip_id INTEGER REFERENCES processed_zips(id), + filename TEXT NOT NULL, + sha256 TEXT, + placed_at TEXT, + status TEXT DEFAULT 'success', + error_msg TEXT + ); """) _migrate(conn) @@ -72,7 +82,9 @@ def _migrate(conn: sqlite3.Connection) -> None: existing = {row[1] for row in conn.execute("PRAGMA table_info(sync_runs)")} if "books_imported" not in existing: conn.execute("ALTER TABLE sync_runs ADD COLUMN books_imported INTEGER DEFAULT 0") - for old_col in ("books_uploaded", "books_skipped"): + if "books_skipped" not in existing: + conn.execute("ALTER TABLE sync_runs ADD COLUMN books_skipped INTEGER DEFAULT 0") + for old_col in ("books_uploaded",): if old_col in existing: try: conn.execute(f"ALTER TABLE sync_runs DROP COLUMN {old_col}") @@ -167,6 +179,14 @@ def mark_zip_processed(remote_path: str, file_size: int, status: str, error_msg: ) +def get_zip_id_by_path(remote_path: str) -> int | None: + with get_db() as conn: + row = conn.execute( + "SELECT id FROM processed_zips WHERE remote_path = ?", (remote_path,) + ).fetchone() + return row["id"] if row else None + + def get_recent_zips(limit: int = 50) -> list[sqlite3.Row]: with get_db() as conn: return conn.execute( @@ -200,30 +220,66 @@ def get_recent_runs(limit: int = 10) -> list[sqlite3.Row]: ).fetchall() +def is_book_processed(sha256: str) -> bool: + with get_db() as conn: + row = conn.execute( + "SELECT id FROM processed_books WHERE sha256 = ? AND status = 'success'", (sha256,) + ).fetchone() + return row is not None + + +def record_book(zip_id: int | None, filename: str, sha256: str, + status: str = "success", error_msg: str | None = None) -> None: + with get_db() as conn: + conn.execute( + "INSERT INTO processed_books (zip_id, filename, sha256, placed_at, status, error_msg) VALUES (?,?,?,?,?,?)", + (zip_id, filename, sha256, _now(), status, error_msg), + ) + + +def get_recent_books(limit: int = 50) -> list[sqlite3.Row]: + with get_db() as conn: + return conn.execute( + """SELECT pb.*, pz.remote_path as zip_remote_path + FROM processed_books pb + LEFT JOIN processed_zips pz ON pz.id = pb.zip_id + ORDER BY pb.placed_at DESC LIMIT ?""", + (limit,), + ).fetchall() + + def get_stats() -> dict: with get_db() as conn: total_zips = conn.execute("SELECT COUNT(*) FROM processed_zips").fetchone()[0] - total_imported = conn.execute( - "SELECT COALESCE(SUM(books_imported), 0) FROM sync_runs" + total_books = conn.execute( + "SELECT COUNT(*) FROM processed_books WHERE status = 'success'" + ).fetchone()[0] + total_skipped = conn.execute( + "SELECT COUNT(*) FROM processed_books WHERE status = 'skipped'" + ).fetchone()[0] + total_errored = conn.execute( + "SELECT COUNT(*) FROM processed_books WHERE status = 'error'" ).fetchone()[0] last_run = conn.execute( "SELECT started_at, status FROM sync_runs ORDER BY started_at DESC LIMIT 1" ).fetchone() return { "total_zips": total_zips, - "total_imported": total_imported, + "total_books": total_books, + "total_skipped": total_skipped, + "total_errored": total_errored, "last_run": dict(last_run) if last_run else None, } def clear_sync_data() -> dict: - """Delete all processed_zips and sync_runs rows. Settings are kept. - Also resets the remote scan timestamp so the next sync does a full rescan.""" + """Delete all processed_books, processed_zips, and sync_runs rows. Settings are kept.""" with get_db() as conn: + books = conn.execute("DELETE FROM processed_books").rowcount zips = conn.execute("DELETE FROM processed_zips").rowcount runs = conn.execute("DELETE FROM sync_runs").rowcount conn.execute("DELETE FROM settings WHERE key = 'remote_cache_last_scan'") - return {"zips": zips, "runs": runs} + return {"zips": zips, "runs": runs, "books": books} def _now() -> str: diff --git a/grimmory.py b/grimmory.py new file mode 100644 index 0000000..d9fca48 --- /dev/null +++ b/grimmory.py @@ -0,0 +1,90 @@ +import hashlib +import logging +import shutil +from pathlib import Path + +import requests + +log = logging.getLogger(__name__) + + +def compute_sha256(path: Path) -> str: + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + h.update(chunk) + return h.hexdigest() + + +def _is_in_grimmory(filename: str, url: str, user: str, password: str) -> bool: + """Search Grimmory Komga-compatible API by filename stem. Non-fatal on error.""" + stem = Path(filename).stem + try: + r = requests.get( + url.rstrip("/") + "/api/v1/books", + params={"search": stem}, + auth=(user, password), + timeout=10, + ) + if r.status_code == 200: + return r.json().get("totalElements", 0) > 0 + log.warning("Grimmory search returned HTTP %s for '%s'", r.status_code, stem) + except Exception as e: + log.warning("Grimmory duplicate check failed for '%s': %s", filename, e) + return False + + +class PlacementResult: + __slots__ = ("status", "sha256", "error_msg") + + def __init__(self, status: str, sha256: str = "", error_msg: str = ""): + self.status = status + self.sha256 = sha256 + self.error_msg = error_msg + + +def place_book( + book_path: Path, + bookdrop_path: str, + url: str, + user: str, + password: str, + sha256: str | None = None, +) -> PlacementResult: + if sha256 is None: + sha256 = compute_sha256(book_path) + + if _is_in_grimmory(book_path.name, url, user, password): + log.info("Skipping '%s' — already in Grimmory", book_path.name) + return PlacementResult("skipped", sha256) + + dest_dir = Path(bookdrop_path) + dest_dir.mkdir(parents=True, exist_ok=True) + dest = dest_dir / book_path.name + counter = 1 + while dest.exists(): + dest = dest_dir / f"{book_path.stem}_{counter}{book_path.suffix}" + counter += 1 + + shutil.copy2(book_path, dest) + log.info("Placed '%s' → %s", book_path.name, dest) + return PlacementResult("success", sha256) + + +def test_connection(url: str, user: str, password: str) -> tuple[bool, str]: + try: + r = requests.get( + url.rstrip("/") + "/api/v1/books", + params={"size": 1}, + auth=(user, password), + timeout=10, + ) + if r.status_code == 200: + return True, "Connected to Grimmory successfully" + if r.status_code == 401: + return False, "Authentication failed — check username and password" + return False, f"HTTP {r.status_code}" + except requests.exceptions.ConnectionError: + return False, "Could not connect — check the URL" + except Exception as e: + return False, str(e) diff --git a/main.py b/main.py index e2b15ae..7b83f64 100644 --- a/main.py +++ b/main.py @@ -3,13 +3,14 @@ from contextlib import asynccontextmanager from pathlib import Path from apscheduler.schedulers.background import BackgroundScheduler -from fastapi import BackgroundTasks, FastAPI, Form, Request -from fastapi.responses import HTMLResponse, RedirectResponse +from fastapi import BackgroundTasks, FastAPI +from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles -from fastapi.templating import Jinja2Templates +from pydantic import BaseModel import config import db +import grimmory as grimmory_module import sftp as sftp_module import sync @@ -50,101 +51,94 @@ async def lifespan(app: FastAPI): app = FastAPI(title="CalibreSync", lifespan=lifespan) app.mount("/static", StaticFiles(directory=Path(__file__).parent / "static"), name="static") -templates = Jinja2Templates(directory=Path(__file__).parent / "templates") -# --- Dashboard --- +# --- SPA root --- -@app.get("/", response_class=HTMLResponse) -async def dashboard(request: Request): - stats = db.get_stats() - runs = [dict(r) for r in db.get_recent_runs(10)] - zips = [dict(z) for z in db.get_recent_zips(20)] +@app.get("/", include_in_schema=False) +async def spa_root(): + return FileResponse(Path(__file__).parent / "static" / "index.html") + + +# --- Dashboard data --- + +@app.get("/api/dashboard") +async def api_dashboard(): interval = int(db.get_setting("scheduler_interval_minutes", "0") or "0") - batch_size = int(db.get_setting("sync_batch_size", "0") or "0") - cache_info = db.get_remote_cache_info() - return templates.TemplateResponse(request, "index.html", { - "stats": stats, - "runs": runs, - "zips": zips, + return { + "stats": db.get_stats(), + "runs": [dict(r) for r in db.get_recent_runs(10)], + "zips": [dict(z) for z in db.get_recent_zips(20)], + "books": [dict(b) for b in db.get_recent_books(20)], "sync_running": sync.is_running(), "next_run": next_run_time(), + "cache_info": db.get_remote_cache_info(), "interval": interval, - "batch_size": batch_size, - "cache_info": cache_info, - }) + } # --- Settings --- -@app.get("/settings", response_class=HTMLResponse) -async def settings_page(request: Request): +@app.get("/api/settings") +async def api_get_settings(): s = db.get_all_settings() key_pem = s.get("sftp_key", "") - return templates.TemplateResponse(request, "settings.html", { - "s": s, + return { + "settings": s, "has_key": bool(key_pem.strip()), "key_fingerprint": sftp_module.get_key_fingerprint(key_pem), - }) + } -@app.post("/settings") -async def save_settings( - request: Request, - sftp_host: str = Form(""), - sftp_port: str = Form("22"), - sftp_user: str = Form(""), - sftp_auth_method: str = Form("key"), - sftp_key: str = Form(""), - sftp_password: str = Form(""), - sftp_remote_path: str = Form(""), - work_dir: str = Form("/tmp/calibresync"), - import_dir: str = Form(""), - scheduler_interval_minutes: str = Form("0"), - sync_batch_size: str = Form("0"), -): - config.save({ - "sftp_host": sftp_host, - "sftp_port": sftp_port, - "sftp_user": sftp_user, - "sftp_auth_method": sftp_auth_method, - "sftp_key": sftp_key, - "sftp_password": sftp_password, - "sftp_remote_path": sftp_remote_path, - "work_dir": work_dir, - "import_dir": import_dir, - "scheduler_interval_minutes": scheduler_interval_minutes, - "sync_batch_size": sync_batch_size, - }) +class SettingsPayload(BaseModel): + sftp_host: str = "" + sftp_port: str = "22" + sftp_user: str = "" + sftp_auth_method: str = "key" + sftp_key: str = "" + sftp_password: str = "" + sftp_remote_path: str = "" + grimmory_url: str = "" + grimmory_user: str = "" + grimmory_password: str = "" + grimmory_bookdrop_path: str = "" + work_dir: str = "/tmp/calibresync" + scheduler_interval_minutes: str = "0" + sync_batch_size: str = "0" + + +@app.post("/api/settings") +async def api_save_settings(payload: SettingsPayload): + config.save(payload.model_dump()) _reschedule_auto_sync() - return RedirectResponse("/settings?saved=1", status_code=303) + return {"ok": True} # --- Sync triggers --- -@app.post("/sync") -async def trigger_sync(background_tasks: BackgroundTasks): +@app.post("/api/sync") +async def api_trigger_sync(background_tasks: BackgroundTasks): if sync.is_running(): - return RedirectResponse("/?already_running=1", status_code=303) + return {"ok": False, "reason": "already_running"} background_tasks.add_task(sync.run_sync) - return RedirectResponse("/?started=1", status_code=303) + return {"ok": True} -@app.post("/sync/test") -async def trigger_test_sync(background_tasks: BackgroundTasks): +@app.post("/api/sync/test") +async def api_trigger_test_sync(background_tasks: BackgroundTasks): if sync.is_running(): - return RedirectResponse("/?already_running=1", status_code=303) + return {"ok": False, "reason": "already_running"} background_tasks.add_task(sync.run_sync, 1) - return RedirectResponse("/?test_started=1", status_code=303) + return {"ok": True} -@app.post("/sync/rescan") -async def trigger_rescan(background_tasks: BackgroundTasks): +@app.post("/api/sync/rescan") +async def api_trigger_rescan(background_tasks: BackgroundTasks): if sync.is_running(): - return RedirectResponse("/?already_running=1", status_code=303) + return {"ok": False, "reason": "already_running"} cfg = config.load() background_tasks.add_task(sftp_module.refresh_remote_zip_cache, cfg.sftp) - return RedirectResponse("/?rescan_started=1", status_code=303) + return {"ok": True} # --- Connection tests --- @@ -156,13 +150,20 @@ async def test_ssh(): return {"ok": ok, "message": message} +@app.get("/api/test/grimmory") +async def test_grimmory(): + cfg = config.load() + ok, message = grimmory_module.test_connection(cfg.grimmory.url, cfg.grimmory.user, cfg.grimmory.password) + return {"ok": ok, "message": message} + + # --- Data reset --- -@app.post("/settings/reset-sync-data") -async def reset_sync_data(): +@app.post("/api/settings/reset-sync-data") +async def api_reset_sync_data(): counts = db.clear_sync_data() log.info("Sync data cleared: %s", counts) - return RedirectResponse("/settings?reset=1", status_code=303) + return {"ok": True, "counts": counts} # --- JSON status API --- diff --git a/requirements.txt b/requirements.txt index 66e2534..a3151fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ fastapi uvicorn[standard] -jinja2 -python-multipart paramiko rarfile apscheduler +requests diff --git a/static/index.html b/static/index.html new file mode 100644 index 0000000..5c63354 --- /dev/null +++ b/static/index.html @@ -0,0 +1,483 @@ + + + + + + CalibreSync + + + + + + + + +
+
+ +
+
+ + + + +
+ + +
+ + +
+ + +
+ + +
+ +
+ + + + diff --git a/sync.py b/sync.py index dd54e24..c1dd682 100644 --- a/sync.py +++ b/sync.py @@ -1,5 +1,4 @@ import logging -import shutil import threading import time from pathlib import Path @@ -7,6 +6,7 @@ from pathlib import Path import config import db import extractor +import grimmory as grimmory_module import sftp as sftp_module log = logging.getLogger(__name__) @@ -33,20 +33,17 @@ def run_sync(limit: int | None = None) -> None: _running = True run_id = db.start_sync_run() - counters = dict(zips_found=0, zips_new=0, books_imported=0, books_errored=0) + counters = dict(zips_found=0, zips_new=0, books_imported=0, books_skipped=0, books_errored=0) try: log.info("Sync started (limit=%s)", limit) cfg = config.load() _validate_config(cfg) - log.info("Config OK — work dir: %s, import dir: %s", cfg.work_dir, cfg.import_dir) + log.info("Config OK — work dir: %s, bookdrop: %s", cfg.work_dir, cfg.grimmory.bookdrop_path) work_dir = Path(cfg.work_dir) work_dir.mkdir(parents=True, exist_ok=True) - import_dir = Path(cfg.import_dir) - import_dir.mkdir(parents=True, exist_ok=True) - log.info("Connecting to SFTP %s@%s:%s ...", cfg.sftp.user, cfg.sftp.host, cfg.sftp.port) new_zips = sftp_module.list_new_zips(cfg.sftp, max_results=limit) counters["zips_found"] = len(new_zips) @@ -71,6 +68,11 @@ def run_sync(limit: int | None = None) -> None: zip_status = "success" zip_error = None local_zip = None + + # Insert zip row early so we have a zip_id for per-book records + db.mark_zip_processed(remote_zip.remote_path, remote_zip.file_size, "running") + zip_id = db.get_zip_id_by_path(remote_zip.remote_path) + try: t0 = time.monotonic() local_zip = sftp_module.download(cfg.sftp, remote_zip, str(work_dir / "downloads")) @@ -81,13 +83,26 @@ def run_sync(limit: int | None = None) -> None: log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books)) for book in books: - dest = import_dir / book.name - if dest.exists(): - log.info("Skipping '%s' — already exists in import dir", book.name) - else: - shutil.move(str(book), str(dest)) - log.info("Moved '%s' → %s", book.name, import_dir) + sha256 = grimmory_module.compute_sha256(book) + if db.is_book_processed(sha256): + log.info("Skipping '%s' — sha256 already processed", book.name) + counters["books_skipped"] += 1 + db.record_book(zip_id, book.name, sha256, status="skipped") + continue + result = grimmory_module.place_book( + book, + cfg.grimmory.bookdrop_path, + cfg.grimmory.url, + cfg.grimmory.user, + cfg.grimmory.password, + sha256=sha256, + ) + if result.status == "success": counters["books_imported"] += 1 + elif result.status == "skipped": + counters["books_skipped"] += 1 + db.record_book(zip_id, book.name, result.sha256, + status=result.status, error_msg=result.error_msg) extractor.cleanup(work_dir / "extracted" / local_zip.stem) except Exception as e: @@ -127,7 +142,13 @@ def _validate_config(cfg) -> None: missing.append("SSH private key") if cfg.sftp.auth_method == "password" and not cfg.sftp.password: missing.append("SSH password") - if not cfg.import_dir: - missing.append("CWA import folder") + if not cfg.grimmory.url: + missing.append("Grimmory URL") + if not cfg.grimmory.user: + missing.append("Grimmory username") + if not cfg.grimmory.password: + missing.append("Grimmory password") + if not cfg.grimmory.bookdrop_path: + missing.append("Grimmory bookdrop path") if missing: raise ValueError(f"Missing configuration: {', '.join(missing)}") diff --git a/templates/settings.html b/templates/settings.html index 12f654b..36611b5 100644 --- a/templates/settings.html +++ b/templates/settings.html @@ -85,15 +85,35 @@
-

Local

+

Grimmory

- - -

Folder watched by Calibre-Web-Automated. Extracted epub/pdf files are moved here flat.

+ +
+
+ + +
+ +
+ + +
+ +
+ +

+
+
+ +
+

Local

+