cwa import
This commit is contained in:
@@ -14,18 +14,11 @@ class SFTPConfig:
|
|||||||
remote_path: str = ""
|
remote_path: str = ""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class CalibreConfig:
|
|
||||||
url: str = ""
|
|
||||||
user: str = ""
|
|
||||||
password: str = ""
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class AppConfig:
|
class AppConfig:
|
||||||
sftp: SFTPConfig = field(default_factory=SFTPConfig)
|
sftp: SFTPConfig = field(default_factory=SFTPConfig)
|
||||||
calibre: CalibreConfig = field(default_factory=CalibreConfig)
|
work_dir: str = "/tmp/calibresync"
|
||||||
local_work_dir: str = "/tmp/calibresync"
|
import_dir: str = ""
|
||||||
|
|
||||||
|
|
||||||
def load() -> AppConfig:
|
def load() -> AppConfig:
|
||||||
@@ -40,12 +33,8 @@ def load() -> AppConfig:
|
|||||||
password=s.get("sftp_password", ""),
|
password=s.get("sftp_password", ""),
|
||||||
remote_path=s.get("sftp_remote_path", ""),
|
remote_path=s.get("sftp_remote_path", ""),
|
||||||
),
|
),
|
||||||
calibre=CalibreConfig(
|
work_dir=s.get("work_dir", "/tmp/calibresync"),
|
||||||
url=s.get("calibre_url", "").rstrip("/"),
|
import_dir=s.get("import_dir", ""),
|
||||||
user=s.get("calibre_user", ""),
|
|
||||||
password=s.get("calibre_pass", ""),
|
|
||||||
),
|
|
||||||
local_work_dir=s.get("local_work_dir", "/tmp/calibresync"),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -53,8 +42,8 @@ def save(form: dict) -> None:
|
|||||||
keys = [
|
keys = [
|
||||||
"sftp_host", "sftp_port", "sftp_user", "sftp_auth_method",
|
"sftp_host", "sftp_port", "sftp_user", "sftp_auth_method",
|
||||||
"sftp_password", "sftp_remote_path",
|
"sftp_password", "sftp_remote_path",
|
||||||
"calibre_url", "calibre_user", "calibre_pass",
|
"work_dir", "import_dir",
|
||||||
"local_work_dir", "scheduler_interval_minutes", "sync_batch_size",
|
"scheduler_interval_minutes", "sync_batch_size",
|
||||||
]
|
]
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if key in form and form[key] is not None:
|
if key in form and form[key] is not None:
|
||||||
|
|||||||
@@ -47,23 +47,13 @@ def init_db() -> None:
|
|||||||
error_msg TEXT
|
error_msg TEXT
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS uploaded_books (
|
|
||||||
id INTEGER PRIMARY KEY,
|
|
||||||
filename TEXT NOT NULL,
|
|
||||||
file_hash TEXT UNIQUE NOT NULL,
|
|
||||||
zip_source TEXT,
|
|
||||||
uploaded_at TEXT,
|
|
||||||
status TEXT
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS sync_runs (
|
CREATE TABLE IF NOT EXISTS sync_runs (
|
||||||
id INTEGER PRIMARY KEY,
|
id INTEGER PRIMARY KEY,
|
||||||
started_at TEXT NOT NULL,
|
started_at TEXT NOT NULL,
|
||||||
finished_at TEXT,
|
finished_at TEXT,
|
||||||
zips_found INTEGER DEFAULT 0,
|
zips_found INTEGER DEFAULT 0,
|
||||||
zips_new INTEGER DEFAULT 0,
|
zips_new INTEGER DEFAULT 0,
|
||||||
books_uploaded INTEGER DEFAULT 0,
|
books_imported INTEGER DEFAULT 0,
|
||||||
books_skipped INTEGER DEFAULT 0,
|
|
||||||
books_errored INTEGER DEFAULT 0,
|
books_errored INTEGER DEFAULT 0,
|
||||||
status TEXT DEFAULT 'running',
|
status TEXT DEFAULT 'running',
|
||||||
error_msg TEXT
|
error_msg TEXT
|
||||||
@@ -171,40 +161,6 @@ def get_recent_zips(limit: int = 50) -> list[sqlite3.Row]:
|
|||||||
).fetchall()
|
).fetchall()
|
||||||
|
|
||||||
|
|
||||||
# --- Uploaded books ---
|
|
||||||
|
|
||||||
def is_book_uploaded(file_hash: str) -> bool:
|
|
||||||
with get_db() as conn:
|
|
||||||
row = conn.execute(
|
|
||||||
"SELECT id FROM uploaded_books WHERE file_hash = ? AND status IN ('uploaded', 'skipped_duplicate')",
|
|
||||||
(file_hash,),
|
|
||||||
).fetchone()
|
|
||||||
return row is not None
|
|
||||||
|
|
||||||
|
|
||||||
def record_book(filename: str, file_hash: str, zip_source: str, status: str) -> None:
|
|
||||||
with get_db() as conn:
|
|
||||||
conn.execute(
|
|
||||||
"""INSERT INTO uploaded_books (filename, file_hash, zip_source, uploaded_at, status)
|
|
||||||
VALUES (?, ?, ?, ?, ?)
|
|
||||||
ON CONFLICT(file_hash) DO UPDATE SET status = excluded.status""",
|
|
||||||
(filename, file_hash, zip_source, _now(), status),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_books(limit: int = 200, offset: int = 0) -> list[sqlite3.Row]:
|
|
||||||
with get_db() as conn:
|
|
||||||
return conn.execute(
|
|
||||||
"SELECT * FROM uploaded_books ORDER BY uploaded_at DESC LIMIT ? OFFSET ?",
|
|
||||||
(limit, offset),
|
|
||||||
).fetchall()
|
|
||||||
|
|
||||||
|
|
||||||
def get_books_count() -> int:
|
|
||||||
with get_db() as conn:
|
|
||||||
return conn.execute("SELECT COUNT(*) FROM uploaded_books").fetchone()[0]
|
|
||||||
|
|
||||||
|
|
||||||
# --- Sync runs ---
|
# --- Sync runs ---
|
||||||
|
|
||||||
def start_sync_run() -> int:
|
def start_sync_run() -> int:
|
||||||
@@ -233,35 +189,28 @@ def get_recent_runs(limit: int = 10) -> list[sqlite3.Row]:
|
|||||||
|
|
||||||
def get_stats() -> dict:
|
def get_stats() -> dict:
|
||||||
with get_db() as conn:
|
with get_db() as conn:
|
||||||
total_books = conn.execute("SELECT COUNT(*) FROM uploaded_books").fetchone()[0]
|
|
||||||
uploaded = conn.execute(
|
|
||||||
"SELECT COUNT(*) FROM uploaded_books WHERE status = 'uploaded'"
|
|
||||||
).fetchone()[0]
|
|
||||||
skipped = conn.execute(
|
|
||||||
"SELECT COUNT(*) FROM uploaded_books WHERE status = 'skipped_duplicate'"
|
|
||||||
).fetchone()[0]
|
|
||||||
total_zips = conn.execute("SELECT COUNT(*) FROM processed_zips").fetchone()[0]
|
total_zips = conn.execute("SELECT COUNT(*) FROM processed_zips").fetchone()[0]
|
||||||
|
total_imported = conn.execute(
|
||||||
|
"SELECT COALESCE(SUM(books_imported), 0) FROM sync_runs"
|
||||||
|
).fetchone()[0]
|
||||||
last_run = conn.execute(
|
last_run = conn.execute(
|
||||||
"SELECT started_at, status FROM sync_runs ORDER BY started_at DESC LIMIT 1"
|
"SELECT started_at, status FROM sync_runs ORDER BY started_at DESC LIMIT 1"
|
||||||
).fetchone()
|
).fetchone()
|
||||||
return {
|
return {
|
||||||
"total_books": total_books,
|
|
||||||
"uploaded": uploaded,
|
|
||||||
"skipped": skipped,
|
|
||||||
"total_zips": total_zips,
|
"total_zips": total_zips,
|
||||||
|
"total_imported": total_imported,
|
||||||
"last_run": dict(last_run) if last_run else None,
|
"last_run": dict(last_run) if last_run else None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def clear_sync_data() -> dict:
|
def clear_sync_data() -> dict:
|
||||||
"""Delete all processed_zips, uploaded_books, and sync_runs rows. Settings are kept.
|
"""Delete all processed_zips and sync_runs rows. Settings are kept.
|
||||||
Also resets the remote scan timestamp so the next sync does a full rescan."""
|
Also resets the remote scan timestamp so the next sync does a full rescan."""
|
||||||
with get_db() as conn:
|
with get_db() as conn:
|
||||||
zips = conn.execute("DELETE FROM processed_zips").rowcount
|
zips = conn.execute("DELETE FROM processed_zips").rowcount
|
||||||
books = conn.execute("DELETE FROM uploaded_books").rowcount
|
|
||||||
runs = conn.execute("DELETE FROM sync_runs").rowcount
|
runs = conn.execute("DELETE FROM sync_runs").rowcount
|
||||||
conn.execute("DELETE FROM settings WHERE key = 'remote_cache_last_scan'")
|
conn.execute("DELETE FROM settings WHERE key = 'remote_cache_last_scan'")
|
||||||
return {"zips": zips, "books": books, "runs": runs}
|
return {"zips": zips, "runs": runs}
|
||||||
|
|
||||||
|
|
||||||
def _now() -> str:
|
def _now() -> str:
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
# Persists the SQLite database and settings across container restarts
|
# Persists the SQLite database and settings across container restarts
|
||||||
- ./data:/app/data
|
- ./data:/app/data
|
||||||
|
# CWA import folder — set the host path to match your CWA ingest directory
|
||||||
|
- /path/to/cwa-import:/cwa-import
|
||||||
# Optional: mount your SSH private key read-only instead of pasting it in the UI
|
# Optional: mount your SSH private key read-only instead of pasting it in the UI
|
||||||
# - ~/.ssh/id_rsa:/run/secrets/ssh_key:ro
|
# - ~/.ssh/id_rsa:/run/secrets/ssh_key:ro
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|||||||
@@ -12,8 +12,6 @@ import config
|
|||||||
import db
|
import db
|
||||||
import sftp as sftp_module
|
import sftp as sftp_module
|
||||||
import sync
|
import sync
|
||||||
import uploader
|
|
||||||
from uploader import CalibreClient, delete_book, fetch_all_books, find_duplicate_groups
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s — %(message)s")
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s — %(message)s")
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@@ -77,23 +75,6 @@ async def dashboard(request: Request):
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
# --- Books ---
|
|
||||||
|
|
||||||
@app.get("/books", response_class=HTMLResponse)
|
|
||||||
async def books_page(request: Request, page: int = 1):
|
|
||||||
per_page = 50
|
|
||||||
offset = (page - 1) * per_page
|
|
||||||
books = [dict(b) for b in db.get_books(limit=per_page, offset=offset)]
|
|
||||||
total = db.get_books_count()
|
|
||||||
pages = max(1, (total + per_page - 1) // per_page)
|
|
||||||
return templates.TemplateResponse(request, "books.html", {
|
|
||||||
"books": books,
|
|
||||||
"page": page,
|
|
||||||
"pages": pages,
|
|
||||||
"total": total,
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
# --- Settings ---
|
# --- Settings ---
|
||||||
|
|
||||||
@app.get("/settings", response_class=HTMLResponse)
|
@app.get("/settings", response_class=HTMLResponse)
|
||||||
@@ -117,10 +98,8 @@ async def save_settings(
|
|||||||
sftp_key: str = Form(""),
|
sftp_key: str = Form(""),
|
||||||
sftp_password: str = Form(""),
|
sftp_password: str = Form(""),
|
||||||
sftp_remote_path: str = Form(""),
|
sftp_remote_path: str = Form(""),
|
||||||
calibre_url: str = Form(""),
|
work_dir: str = Form("/tmp/calibresync"),
|
||||||
calibre_user: str = Form(""),
|
import_dir: str = Form(""),
|
||||||
calibre_pass: str = Form(""),
|
|
||||||
local_work_dir: str = Form("/tmp/calibresync"),
|
|
||||||
scheduler_interval_minutes: str = Form("0"),
|
scheduler_interval_minutes: str = Form("0"),
|
||||||
sync_batch_size: str = Form("0"),
|
sync_batch_size: str = Form("0"),
|
||||||
):
|
):
|
||||||
@@ -132,10 +111,8 @@ async def save_settings(
|
|||||||
"sftp_key": sftp_key,
|
"sftp_key": sftp_key,
|
||||||
"sftp_password": sftp_password,
|
"sftp_password": sftp_password,
|
||||||
"sftp_remote_path": sftp_remote_path,
|
"sftp_remote_path": sftp_remote_path,
|
||||||
"calibre_url": calibre_url,
|
"work_dir": work_dir,
|
||||||
"calibre_user": calibre_user,
|
"import_dir": import_dir,
|
||||||
"calibre_pass": calibre_pass,
|
|
||||||
"local_work_dir": local_work_dir,
|
|
||||||
"scheduler_interval_minutes": scheduler_interval_minutes,
|
"scheduler_interval_minutes": scheduler_interval_minutes,
|
||||||
"sync_batch_size": sync_batch_size,
|
"sync_batch_size": sync_batch_size,
|
||||||
})
|
})
|
||||||
@@ -179,111 +156,6 @@ async def test_ssh():
|
|||||||
return {"ok": ok, "message": message}
|
return {"ok": ok, "message": message}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/test/calibre")
|
|
||||||
async def test_calibre():
|
|
||||||
cfg = config.load()
|
|
||||||
ok, message = uploader.test_connection(cfg.calibre)
|
|
||||||
return {"ok": ok, "message": message}
|
|
||||||
|
|
||||||
|
|
||||||
# --- Duplicates ---
|
|
||||||
|
|
||||||
@app.get("/duplicates", response_class=HTMLResponse)
|
|
||||||
async def duplicates_page(request: Request):
|
|
||||||
cfg = config.load()
|
|
||||||
error = None
|
|
||||||
groups: list = []
|
|
||||||
total_books = 0
|
|
||||||
try:
|
|
||||||
books = fetch_all_books(cfg.calibre)
|
|
||||||
total_books = len(books)
|
|
||||||
groups = find_duplicate_groups(books)
|
|
||||||
except Exception as e:
|
|
||||||
error = str(e)
|
|
||||||
return templates.TemplateResponse(request, "duplicates.html", {
|
|
||||||
"groups": groups,
|
|
||||||
"total_books": total_books,
|
|
||||||
"error": error,
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/delete_book/{book_id}")
|
|
||||||
async def delete_book_api(book_id: int):
|
|
||||||
cfg = config.load()
|
|
||||||
ok, message = delete_book(cfg.calibre, book_id)
|
|
||||||
return {"ok": ok, "message": message}
|
|
||||||
|
|
||||||
|
|
||||||
_dedup_state: dict = {"running": False, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None}
|
|
||||||
|
|
||||||
|
|
||||||
def _run_dedup():
|
|
||||||
global _dedup_state
|
|
||||||
try:
|
|
||||||
cfg = config.load()
|
|
||||||
log.info("Dedup: fetching all books ...")
|
|
||||||
client = CalibreClient(cfg.calibre)
|
|
||||||
client._ensure_auth()
|
|
||||||
books = fetch_all_books(cfg.calibre)
|
|
||||||
groups = find_duplicate_groups(books)
|
|
||||||
to_delete = [b for group in groups for b in sorted(group, key=lambda x: x.get("id", 0))[1:]]
|
|
||||||
_dedup_state.update({"total": len(to_delete), "deleted": 0, "failed": 0})
|
|
||||||
log.info("Dedup: %d duplicate(s) to delete across %d group(s)", len(to_delete), len(groups))
|
|
||||||
for book in to_delete:
|
|
||||||
ok, msg = delete_book(cfg.calibre, book["id"], client)
|
|
||||||
if ok:
|
|
||||||
_dedup_state["deleted"] += 1
|
|
||||||
else:
|
|
||||||
_dedup_state["failed"] += 1
|
|
||||||
log.warning("Dedup: failed to delete book %d: %s", book["id"], msg)
|
|
||||||
if _dedup_state["deleted"] % 10 == 0:
|
|
||||||
log.info("Dedup progress: %d / %d deleted", _dedup_state["deleted"], _dedup_state["total"])
|
|
||||||
log.info("Dedup done: %d deleted, %d failed", _dedup_state["deleted"], _dedup_state["failed"])
|
|
||||||
except Exception as e:
|
|
||||||
log.error("Dedup error: %s", e)
|
|
||||||
_dedup_state["error"] = str(e)
|
|
||||||
finally:
|
|
||||||
_dedup_state["running"] = False
|
|
||||||
_dedup_state["done"] = True
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/delete_duplicates")
|
|
||||||
async def delete_duplicates_api(background_tasks: BackgroundTasks):
|
|
||||||
if _dedup_state["running"]:
|
|
||||||
return {"ok": False, "message": "Already running"}
|
|
||||||
_dedup_state.update({"running": True, "deleted": 0, "failed": 0, "total": 0, "done": False, "error": None})
|
|
||||||
background_tasks.add_task(_run_dedup)
|
|
||||||
return {"ok": True, "message": "Started"}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/delete_duplicates/status")
|
|
||||||
async def delete_duplicates_status():
|
|
||||||
return _dedup_state
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/debug/calibre_books")
|
|
||||||
async def debug_calibre_books():
|
|
||||||
"""Show raw Calibre-Web listbooks response shape so we can identify field names."""
|
|
||||||
cfg = config.load()
|
|
||||||
from uploader import CalibreClient
|
|
||||||
client = CalibreClient(cfg.calibre)
|
|
||||||
client._ensure_auth()
|
|
||||||
resp = client._session.get(
|
|
||||||
f"{cfg.calibre.url}/ajax/listbooks",
|
|
||||||
params={"draw": 1, "start": 0, "length": 5, "sort": "title", "order": "asc"},
|
|
||||||
timeout=30,
|
|
||||||
)
|
|
||||||
data = resp.json()
|
|
||||||
non_list = {k: v for k, v in data.items() if not isinstance(v, list)}
|
|
||||||
list_keys = {k: len(v) for k, v in data.items() if isinstance(v, list)}
|
|
||||||
return {
|
|
||||||
"http_status": resp.status_code,
|
|
||||||
"top_level_keys": list(data.keys()),
|
|
||||||
"non_list_fields": non_list,
|
|
||||||
"list_fields_lengths": list_keys,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# --- Data reset ---
|
# --- Data reset ---
|
||||||
|
|
||||||
@app.post("/settings/reset-sync-data")
|
@app.post("/settings/reset-sync-data")
|
||||||
|
|||||||
@@ -4,5 +4,4 @@ jinja2
|
|||||||
python-multipart
|
python-multipart
|
||||||
paramiko
|
paramiko
|
||||||
rarfile
|
rarfile
|
||||||
requests
|
|
||||||
apscheduler
|
apscheduler
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import shutil
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -7,7 +8,6 @@ import config
|
|||||||
import db
|
import db
|
||||||
import extractor
|
import extractor
|
||||||
import sftp as sftp_module
|
import sftp as sftp_module
|
||||||
from uploader import CalibreClient, CalibreUnavailableError
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -33,22 +33,23 @@ def run_sync(limit: int | None = None) -> None:
|
|||||||
|
|
||||||
_running = True
|
_running = True
|
||||||
run_id = db.start_sync_run()
|
run_id = db.start_sync_run()
|
||||||
counters = dict(zips_found=0, zips_new=0, books_uploaded=0, books_skipped=0, books_errored=0)
|
counters = dict(zips_found=0, zips_new=0, books_imported=0, books_errored=0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
log.info("Sync started (limit=%s)", limit)
|
log.info("Sync started (limit=%s)", limit)
|
||||||
cfg = config.load()
|
cfg = config.load()
|
||||||
_validate_config(cfg)
|
_validate_config(cfg)
|
||||||
log.info("Config OK — work dir: %s", cfg.local_work_dir)
|
log.info("Config OK — work dir: %s, import dir: %s", cfg.work_dir, cfg.import_dir)
|
||||||
|
|
||||||
work_dir = Path(cfg.local_work_dir)
|
work_dir = Path(cfg.work_dir)
|
||||||
work_dir.mkdir(parents=True, exist_ok=True)
|
work_dir.mkdir(parents=True, exist_ok=True)
|
||||||
log.info("Work dir ready: %s", work_dir)
|
|
||||||
|
import_dir = Path(cfg.import_dir)
|
||||||
|
import_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
log.info("Connecting to SFTP %s@%s:%s ...", cfg.sftp.user, cfg.sftp.host, cfg.sftp.port)
|
log.info("Connecting to SFTP %s@%s:%s ...", cfg.sftp.user, cfg.sftp.host, cfg.sftp.port)
|
||||||
new_zips = sftp_module.list_new_zips(cfg.sftp, max_results=limit)
|
new_zips = sftp_module.list_new_zips(cfg.sftp, max_results=limit)
|
||||||
counters["zips_found"] = len(new_zips)
|
counters["zips_found"] = len(new_zips)
|
||||||
|
|
||||||
counters["zips_new"] = len(new_zips)
|
counters["zips_new"] = len(new_zips)
|
||||||
|
|
||||||
if not new_zips:
|
if not new_zips:
|
||||||
@@ -56,21 +57,11 @@ def run_sync(limit: int | None = None) -> None:
|
|||||||
db.finish_sync_run(run_id, status="success", **counters)
|
db.finish_sync_run(run_id, status="success", **counters)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Determine chunk size; 0 means process everything in one chunk
|
|
||||||
batch_size = int(db.get_setting("sync_batch_size", "0") or "0")
|
batch_size = int(db.get_setting("sync_batch_size", "0") or "0")
|
||||||
if batch_size <= 0:
|
if batch_size <= 0:
|
||||||
batch_size = len(new_zips)
|
batch_size = len(new_zips)
|
||||||
|
|
||||||
total_batches = -(-len(new_zips) // batch_size) # ceiling division
|
total_batches = -(-len(new_zips) // batch_size)
|
||||||
client = CalibreClient(cfg.calibre)
|
|
||||||
|
|
||||||
# Pre-load existing book titles so duplicate detection doesn't need per-book OPDS searches
|
|
||||||
try:
|
|
||||||
from uploader import fetch_all_books
|
|
||||||
existing = fetch_all_books(cfg.calibre)
|
|
||||||
client.preload_existing_titles(existing)
|
|
||||||
except Exception as exc:
|
|
||||||
log.warning("Could not pre-load existing books (%s) — will fall back to per-book OPDS search", exc)
|
|
||||||
|
|
||||||
for batch_num, i in enumerate(range(0, len(new_zips), batch_size), start=1):
|
for batch_num, i in enumerate(range(0, len(new_zips), batch_size), start=1):
|
||||||
chunk = new_zips[i : i + batch_size]
|
chunk = new_zips[i : i + batch_size]
|
||||||
@@ -89,34 +80,21 @@ def run_sync(limit: int | None = None) -> None:
|
|||||||
books = extractor.extract(local_zip, work_dir / "extracted")
|
books = extractor.extract(local_zip, work_dir / "extracted")
|
||||||
log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books))
|
log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books))
|
||||||
|
|
||||||
books_errored_this_zip = 0
|
|
||||||
for book in books:
|
for book in books:
|
||||||
t2 = time.monotonic()
|
dest = import_dir / book.name
|
||||||
status = client.upload(book, zip_source=remote_zip.remote_path)
|
if dest.exists():
|
||||||
log.info("Upload '%s' → %s (%.1fs)", book.name, status, time.monotonic() - t2)
|
log.info("Skipping '%s' — already exists in import dir", book.name)
|
||||||
time.sleep(2)
|
|
||||||
if status == "uploaded":
|
|
||||||
counters["books_uploaded"] += 1
|
|
||||||
elif status == "skipped_duplicate":
|
|
||||||
counters["books_skipped"] += 1
|
|
||||||
else:
|
else:
|
||||||
counters["books_errored"] += 1
|
shutil.move(str(book), str(dest))
|
||||||
books_errored_this_zip += 1
|
log.info("Moved '%s' → %s", book.name, import_dir)
|
||||||
|
counters["books_imported"] += 1
|
||||||
if books_errored_this_zip:
|
|
||||||
zip_status = "error"
|
|
||||||
zip_error = f"{books_errored_this_zip} book upload(s) failed — will retry next sync"
|
|
||||||
|
|
||||||
extractor.cleanup(work_dir / "extracted" / local_zip.stem)
|
extractor.cleanup(work_dir / "extracted" / local_zip.stem)
|
||||||
except CalibreUnavailableError as e:
|
|
||||||
log.error("Calibre-Web unavailable — aborting sync run: %s", e)
|
|
||||||
db.mark_zip_processed(remote_zip.remote_path, remote_zip.file_size, "error", str(e))
|
|
||||||
db.finish_sync_run(run_id, status="error", error_msg=str(e), **counters)
|
|
||||||
return
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error("Error processing %s: %s", remote_zip.remote_path, e)
|
log.error("Error processing %s: %s", remote_zip.remote_path, e)
|
||||||
zip_status = "error"
|
zip_status = "error"
|
||||||
zip_error = str(e)
|
zip_error = str(e)
|
||||||
|
counters["books_errored"] += 1
|
||||||
finally:
|
finally:
|
||||||
if local_zip and local_zip.exists():
|
if local_zip and local_zip.exists():
|
||||||
extractor.cleanup(local_zip)
|
extractor.cleanup(local_zip)
|
||||||
@@ -126,9 +104,8 @@ def run_sync(limit: int | None = None) -> None:
|
|||||||
|
|
||||||
db.finish_sync_run(run_id, status="success", **counters)
|
db.finish_sync_run(run_id, status="success", **counters)
|
||||||
log.info(
|
log.info(
|
||||||
"Sync complete. Total zips: %d, Uploaded: %d, Skipped: %d, Errors: %d",
|
"Sync complete. Total zips: %d, Imported: %d, Errors: %d",
|
||||||
counters["zips_new"], counters["books_uploaded"],
|
counters["zips_new"], counters["books_imported"], counters["books_errored"],
|
||||||
counters["books_skipped"], counters["books_errored"],
|
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception("Sync run failed: %s", e)
|
log.exception("Sync run failed: %s", e)
|
||||||
@@ -150,9 +127,7 @@ def _validate_config(cfg) -> None:
|
|||||||
missing.append("SSH private key")
|
missing.append("SSH private key")
|
||||||
if cfg.sftp.auth_method == "password" and not cfg.sftp.password:
|
if cfg.sftp.auth_method == "password" and not cfg.sftp.password:
|
||||||
missing.append("SSH password")
|
missing.append("SSH password")
|
||||||
if not cfg.calibre.url:
|
if not cfg.import_dir:
|
||||||
missing.append("Calibre-Web URL")
|
missing.append("CWA import folder")
|
||||||
if not cfg.calibre.user:
|
|
||||||
missing.append("Calibre-Web username")
|
|
||||||
if missing:
|
if missing:
|
||||||
raise ValueError(f"Missing configuration: {', '.join(missing)}")
|
raise ValueError(f"Missing configuration: {', '.join(missing)}")
|
||||||
|
|||||||
@@ -1,46 +0,0 @@
|
|||||||
{% extends "base.html" %}
|
|
||||||
{% block title %}Books — CalibreSync{% endblock %}
|
|
||||||
|
|
||||||
{% block content %}
|
|
||||||
<div class="page-header">
|
|
||||||
<h1>Books <span class="muted">({{ total }})</span></h1>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{% if books %}
|
|
||||||
<table>
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Filename</th>
|
|
||||||
<th>Status</th>
|
|
||||||
<th>Source zip</th>
|
|
||||||
<th>Uploaded</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
{% for b in books %}
|
|
||||||
<tr>
|
|
||||||
<td>{{ b.filename }}</td>
|
|
||||||
<td><span class="badge badge-{{ b.status }}">{{ b.status }}</span></td>
|
|
||||||
<td class="mono small muted">{{ b.zip_source or "—" }}</td>
|
|
||||||
<td>{{ b.uploaded_at[:19].replace("T"," ") if b.uploaded_at else "—" }}</td>
|
|
||||||
</tr>
|
|
||||||
{% endfor %}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
{% if pages > 1 %}
|
|
||||||
<div class="pagination">
|
|
||||||
{% if page > 1 %}
|
|
||||||
<a href="/books?page={{ page - 1 }}">« Prev</a>
|
|
||||||
{% endif %}
|
|
||||||
<span>Page {{ page }} of {{ pages }}</span>
|
|
||||||
{% if page < pages %}
|
|
||||||
<a href="/books?page={{ page + 1 }}">Next »</a>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
{% else %}
|
|
||||||
<p class="muted">No books recorded yet.</p>
|
|
||||||
{% endif %}
|
|
||||||
{% endblock %}
|
|
||||||
@@ -1,124 +0,0 @@
|
|||||||
{% extends "base.html" %}
|
|
||||||
{% block title %}Duplicates — CalibreSync{% endblock %}
|
|
||||||
|
|
||||||
{% block content %}
|
|
||||||
<div class="page-header">
|
|
||||||
<h1>Duplicate books in Calibre-Web</h1>
|
|
||||||
{% if groups %}
|
|
||||||
<div class="header-actions">
|
|
||||||
<button class="btn btn-danger" onclick="deleteAll(this)">Delete all duplicates (keep oldest)</button>
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div id="dedup-progress" style="display:none" class="alert alert-success"></div>
|
|
||||||
|
|
||||||
{% if error %}
|
|
||||||
<div class="alert alert-warning">Could not fetch books from Calibre-Web: {{ error }}</div>
|
|
||||||
{% else %}
|
|
||||||
<p class="muted small" style="margin-bottom:1.5rem">
|
|
||||||
Scanned <strong>{{ total_books }}</strong> book(s) —
|
|
||||||
{% if groups %}
|
|
||||||
found <strong>{{ groups|length }}</strong> duplicate group(s) (same title + author).
|
|
||||||
The oldest copy (lowest ID) is kept when deleting all.
|
|
||||||
{% else %}
|
|
||||||
no duplicates found.
|
|
||||||
{% endif %}
|
|
||||||
</p>
|
|
||||||
|
|
||||||
{% for group in groups %}
|
|
||||||
<div class="form-section" style="margin-bottom:1rem">
|
|
||||||
<h3 style="margin-top:0">{{ group[0].title }}</h3>
|
|
||||||
<table>
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>ID</th>
|
|
||||||
<th>Title</th>
|
|
||||||
<th>Authors</th>
|
|
||||||
<th>Format</th>
|
|
||||||
<th></th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
{% for book in group %}
|
|
||||||
<tr id="row-{{ book.id }}">
|
|
||||||
<td class="mono muted">{{ book.id }}</td>
|
|
||||||
<td>{{ book.title }}</td>
|
|
||||||
<td>{{ book.authors }}</td>
|
|
||||||
<td>{{ book.format or "—" }}</td>
|
|
||||||
<td>
|
|
||||||
<button class="btn btn-danger" style="padding:0.2rem 0.7rem;font-size:0.85rem"
|
|
||||||
onclick="deleteBook({{ book.id }}, this)">Delete</button>
|
|
||||||
<span id="status-{{ book.id }}" class="muted small" style="margin-left:0.5rem"></span>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
{% endfor %}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
{% endfor %}
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
<script>
|
|
||||||
async function deleteBook(id, btn) {
|
|
||||||
if (!confirm('Delete book ID ' + id + ' from Calibre-Web?')) return;
|
|
||||||
btn.disabled = true;
|
|
||||||
btn.textContent = 'Deleting…';
|
|
||||||
const status = document.getElementById('status-' + id);
|
|
||||||
try {
|
|
||||||
const r = await fetch('/api/delete_book/' + id, {method: 'POST'});
|
|
||||||
const data = await r.json();
|
|
||||||
if (data.ok) {
|
|
||||||
document.getElementById('row-' + id).style.opacity = '0.35';
|
|
||||||
btn.textContent = 'Deleted';
|
|
||||||
status.textContent = '✓';
|
|
||||||
} else {
|
|
||||||
btn.disabled = false;
|
|
||||||
btn.textContent = 'Delete';
|
|
||||||
status.textContent = 'Failed: ' + data.message;
|
|
||||||
status.style.color = 'var(--error, #f87171)';
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
btn.disabled = false;
|
|
||||||
btn.textContent = 'Delete';
|
|
||||||
status.textContent = 'Error: ' + e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function deleteAll(btn) {
|
|
||||||
if (!confirm('Delete all duplicates from Calibre-Web, keeping the oldest copy of each title+author? This cannot be undone.')) return;
|
|
||||||
btn.disabled = true;
|
|
||||||
btn.textContent = 'Starting…';
|
|
||||||
const progress = document.getElementById('dedup-progress');
|
|
||||||
progress.style.display = '';
|
|
||||||
progress.textContent = 'Fetching book list from Calibre-Web…';
|
|
||||||
|
|
||||||
await fetch('/api/delete_duplicates', {method: 'POST'});
|
|
||||||
|
|
||||||
const poll = setInterval(async () => {
|
|
||||||
const r = await fetch('/api/delete_duplicates/status');
|
|
||||||
const s = await r.json();
|
|
||||||
if (s.error) {
|
|
||||||
clearInterval(poll);
|
|
||||||
progress.textContent = 'Error: ' + s.error;
|
|
||||||
progress.className = 'alert alert-warning';
|
|
||||||
btn.disabled = false;
|
|
||||||
btn.textContent = 'Delete all duplicates (keep oldest)';
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (s.total > 0) {
|
|
||||||
progress.textContent = `Deleting… ${s.deleted} / ${s.total} deleted, ${s.failed} failed`;
|
|
||||||
} else {
|
|
||||||
progress.textContent = 'Scanning for duplicates…';
|
|
||||||
}
|
|
||||||
if (s.done) {
|
|
||||||
clearInterval(poll);
|
|
||||||
progress.textContent = `Done — ${s.deleted} book(s) deleted, ${s.failed} failed. Reload to refresh the list.`;
|
|
||||||
btn.textContent = 'Reload';
|
|
||||||
btn.disabled = false;
|
|
||||||
btn.onclick = () => location.reload();
|
|
||||||
}
|
|
||||||
}, 2000);
|
|
||||||
}
|
|
||||||
</script>
|
|
||||||
{% endblock %}
|
|
||||||
+4
-14
@@ -59,16 +59,8 @@
|
|||||||
<div class="stat-label">Zip archives processed</div>
|
<div class="stat-label">Zip archives processed</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="stat-card">
|
<div class="stat-card">
|
||||||
<div class="stat-value">{{ stats.uploaded }}</div>
|
<div class="stat-value">{{ stats.total_imported }}</div>
|
||||||
<div class="stat-label">Books uploaded</div>
|
<div class="stat-label">Books imported</div>
|
||||||
</div>
|
|
||||||
<div class="stat-card">
|
|
||||||
<div class="stat-value">{{ stats.skipped }}</div>
|
|
||||||
<div class="stat-label">Duplicates skipped</div>
|
|
||||||
</div>
|
|
||||||
<div class="stat-card">
|
|
||||||
<div class="stat-value">{{ stats.total_books }}</div>
|
|
||||||
<div class="stat-label">Total book records</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -81,8 +73,7 @@
|
|||||||
<th>Finished</th>
|
<th>Finished</th>
|
||||||
<th>Status</th>
|
<th>Status</th>
|
||||||
<th>New zips</th>
|
<th>New zips</th>
|
||||||
<th>Uploaded</th>
|
<th>Imported</th>
|
||||||
<th>Skipped</th>
|
|
||||||
<th>Errors</th>
|
<th>Errors</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
@@ -93,8 +84,7 @@
|
|||||||
<td>{{ r.finished_at[:19].replace("T"," ") if r.finished_at else "—" }}</td>
|
<td>{{ r.finished_at[:19].replace("T"," ") if r.finished_at else "—" }}</td>
|
||||||
<td><span class="badge badge-{{ r.status }}">{{ r.status }}</span></td>
|
<td><span class="badge badge-{{ r.status }}">{{ r.status }}</span></td>
|
||||||
<td>{{ r.zips_new }}</td>
|
<td>{{ r.zips_new }}</td>
|
||||||
<td>{{ r.books_uploaded }}</td>
|
<td>{{ r.books_imported }}</td>
|
||||||
<td>{{ r.books_skipped }}</td>
|
|
||||||
<td>{{ r.books_errored }}</td>
|
<td>{{ r.books_errored }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|||||||
+11
-30
@@ -84,39 +84,20 @@
|
|||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<section class="form-section">
|
|
||||||
<h2>Calibre-Web</h2>
|
|
||||||
|
|
||||||
<div class="form-row">
|
|
||||||
<label for="calibre_url">URL</label>
|
|
||||||
<input id="calibre_url" name="calibre_url" type="url" placeholder="http://localhost:8083"
|
|
||||||
value="{{ s.get('calibre_url','') }}">
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="form-row">
|
|
||||||
<label for="calibre_user">Username</label>
|
|
||||||
<input id="calibre_user" name="calibre_user" type="text" value="{{ s.get('calibre_user','') }}">
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="form-row">
|
|
||||||
<label for="calibre_pass">Password</label>
|
|
||||||
<input id="calibre_pass" name="calibre_pass" type="password"
|
|
||||||
value="{{ s.get('calibre_pass','') }}">
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="form-row">
|
|
||||||
<button type="button" class="btn btn-secondary" onclick="testConn('calibre', this)">Test Calibre-Web connection</button>
|
|
||||||
<p id="test-calibre-result" class="test-result"></p>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section class="form-section">
|
<section class="form-section">
|
||||||
<h2>Local</h2>
|
<h2>Local</h2>
|
||||||
|
|
||||||
<div class="form-row">
|
<div class="form-row">
|
||||||
<label for="local_work_dir">Work directory</label>
|
<label for="import_dir">CWA import folder</label>
|
||||||
<input id="local_work_dir" name="local_work_dir" type="text" placeholder="/tmp/calibresync"
|
<input id="import_dir" name="import_dir" type="text" placeholder="/mnt/cwa-import"
|
||||||
value="{{ s.get('local_work_dir','/tmp/calibresync') }}">
|
value="{{ s.get('import_dir','') }}">
|
||||||
|
<p class="muted small">Folder watched by Calibre-Web-Automated. Extracted epub/pdf files are moved here flat.</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="form-row">
|
||||||
|
<label for="work_dir">Temp work directory</label>
|
||||||
|
<input id="work_dir" name="work_dir" type="text" placeholder="/tmp/calibresync"
|
||||||
|
value="{{ s.get('work_dir','/tmp/calibresync') }}">
|
||||||
<p class="muted small">Temporary storage for downloaded zips and extracted files. Cleaned up after each run.</p>
|
<p class="muted small">Temporary storage for downloaded zips and extracted files. Cleaned up after each run.</p>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
@@ -182,7 +163,7 @@ async function testConn(type, btn) {
|
|||||||
result.className = "test-result test-fail";
|
result.className = "test-result test-fail";
|
||||||
} finally {
|
} finally {
|
||||||
btn.disabled = false;
|
btn.disabled = false;
|
||||||
btn.textContent = type === "ssh" ? "Test SSH connection" : "Test Calibre-Web connection";
|
btn.textContent = "Test SSH connection";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
-413
@@ -1,413 +0,0 @@
|
|||||||
import hashlib
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import unicodedata
|
|
||||||
from pathlib import Path
|
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
import db
|
|
||||||
from config import CalibreConfig
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
MIME_TYPES = {
|
|
||||||
".epub": "application/epub+zip",
|
|
||||||
".pdf": "application/pdf",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Words stripped before comparing titles — release-group tags, language codes, format names, etc.
|
|
||||||
_JUNK_WORDS = {
|
|
||||||
"retail", "epub", "ebook", "pdf", "mobi", "azw3", "decipher",
|
|
||||||
"swedish", "english", "danish", "norwegian", "finnish", "german", "french",
|
|
||||||
"the", "a", "an", "och", "und", "les", "der", "die", "das",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class CalibreUnavailableError(RuntimeError):
|
|
||||||
"""Raised when Calibre-Web returns repeated 502/503/504 — sync run should abort."""
|
|
||||||
|
|
||||||
|
|
||||||
class CalibreClient:
|
|
||||||
def __init__(self, cfg: CalibreConfig):
|
|
||||||
self._cfg = cfg
|
|
||||||
self._session = requests.Session()
|
|
||||||
self._authenticated = False
|
|
||||||
self._upload_csrf: str | None = None
|
|
||||||
self._consecutive_failures = 0
|
|
||||||
# Pre-loaded title word-sets for fast duplicate detection (set by preload_existing_titles)
|
|
||||||
self._existing_title_sets: list[frozenset[str]] | None = None
|
|
||||||
|
|
||||||
def preload_existing_titles(self, books: list[dict]) -> None:
|
|
||||||
"""Build an in-memory index of normalised title keywords from a pre-fetched book list."""
|
|
||||||
self._existing_title_sets = [
|
|
||||||
frozenset(_normalize_words(b.get("title", "")))
|
|
||||||
for b in books
|
|
||||||
if b.get("title")
|
|
||||||
]
|
|
||||||
log.info("Pre-loaded %d existing book titles for duplicate detection", len(self._existing_title_sets))
|
|
||||||
|
|
||||||
def _ensure_auth(self) -> None:
|
|
||||||
if self._authenticated:
|
|
||||||
return
|
|
||||||
login_url = f"{self._cfg.url}/login"
|
|
||||||
page = self._session.get(login_url, timeout=30)
|
|
||||||
page.raise_for_status()
|
|
||||||
csrf = _extract_csrf(page.text)
|
|
||||||
|
|
||||||
data = {"username": self._cfg.user, "password": self._cfg.password}
|
|
||||||
if csrf:
|
|
||||||
data["csrf_token"] = csrf
|
|
||||||
|
|
||||||
resp = self._session.post(login_url, data=data, allow_redirects=True, timeout=30)
|
|
||||||
resp.raise_for_status()
|
|
||||||
if resp.url.rstrip("/").endswith("/login"):
|
|
||||||
raise RuntimeError("Calibre-Web authentication failed — check credentials")
|
|
||||||
self._authenticated = True
|
|
||||||
self._upload_csrf = _extract_csrf(resp.text) or csrf
|
|
||||||
log.info("Authenticated to Calibre-Web at %s", self._cfg.url)
|
|
||||||
|
|
||||||
def _exists_in_calibre(self, filename: str) -> bool:
|
|
||||||
"""Check whether a book already exists in Calibre-Web. Returns True if likely duplicate."""
|
|
||||||
keywords = _keywords_from_filename(filename)
|
|
||||||
if len(keywords) < 2:
|
|
||||||
return False
|
|
||||||
our_words = set(keywords)
|
|
||||||
|
|
||||||
# Fast path: check pre-loaded title index (available when sync pre-fetches all books)
|
|
||||||
if self._existing_title_sets is not None:
|
|
||||||
for their_words in self._existing_title_sets:
|
|
||||||
if not their_words:
|
|
||||||
continue
|
|
||||||
overlap = len(our_words & their_words)
|
|
||||||
# Match if: 3+ words in common, OR 60%+ of filename keywords match the title,
|
|
||||||
# OR 60%+ of the stored title's words appear in the filename keywords.
|
|
||||||
# The third condition catches short titles drowned out by filename noise.
|
|
||||||
if (overlap >= 3
|
|
||||||
or (overlap / len(our_words) >= 0.6)
|
|
||||||
or (len(their_words) >= 2 and overlap / len(their_words) >= 0.6)):
|
|
||||||
log.info("Duplicate (preloaded index): '%s'", filename)
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Slow path fallback: OPDS search (used when no index is available)
|
|
||||||
query = " ".join(keywords[:6])
|
|
||||||
try:
|
|
||||||
resp = self._session.get(
|
|
||||||
f"{self._cfg.url}/opds/search/{quote(query, safe='')}",
|
|
||||||
auth=(self._cfg.user, self._cfg.password),
|
|
||||||
timeout=15,
|
|
||||||
)
|
|
||||||
if resp.status_code == 404:
|
|
||||||
return False
|
|
||||||
calibre_titles = _parse_opds_titles(resp.text)
|
|
||||||
if not calibre_titles:
|
|
||||||
return False
|
|
||||||
|
|
||||||
for title in calibre_titles:
|
|
||||||
their_words = set(_normalize_words(title))
|
|
||||||
if not their_words:
|
|
||||||
continue
|
|
||||||
overlap = len(our_words & their_words)
|
|
||||||
if (overlap >= 3
|
|
||||||
or (overlap / len(our_words) >= 0.6)
|
|
||||||
or (len(their_words) >= 2 and overlap / len(their_words) >= 0.6)):
|
|
||||||
log.info("Duplicate (OPDS search): '%s'", filename)
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
log.warning("OPDS search failed for '%s': %s — proceeding with upload", filename, e)
|
|
||||||
return False
|
|
||||||
|
|
||||||
def upload(self, book_path: Path, zip_source: str) -> str:
|
|
||||||
"""Upload a book file. Returns status: 'uploaded' | 'skipped_duplicate' | 'error'."""
|
|
||||||
file_hash = _sha256(book_path)
|
|
||||||
|
|
||||||
# Primary guard: hash already in our DB
|
|
||||||
if db.is_book_uploaded(file_hash):
|
|
||||||
log.info("Skipping (already uploaded): %s", book_path.name)
|
|
||||||
db.record_book(book_path.name, file_hash, zip_source, "skipped_duplicate")
|
|
||||||
return "skipped_duplicate"
|
|
||||||
|
|
||||||
try:
|
|
||||||
self._ensure_auth()
|
|
||||||
|
|
||||||
# Secondary guard: title search in Calibre-Web (catches pre-existing books)
|
|
||||||
if self._exists_in_calibre(book_path.name):
|
|
||||||
log.info("Skipping (exists in Calibre-Web): %s", book_path.name)
|
|
||||||
db.record_book(book_path.name, file_hash, zip_source, "skipped_duplicate")
|
|
||||||
return "skipped_duplicate"
|
|
||||||
|
|
||||||
mime = MIME_TYPES.get(book_path.suffix.lower(), "application/octet-stream")
|
|
||||||
for attempt in range(1, 4):
|
|
||||||
try:
|
|
||||||
with book_path.open("rb") as fh:
|
|
||||||
resp = self._session.post(
|
|
||||||
f"{self._cfg.url}/upload",
|
|
||||||
files={"btn-upload": (book_path.name, fh, mime)},
|
|
||||||
data={"csrf_token": self._upload_csrf} if self._upload_csrf else {},
|
|
||||||
timeout=120,
|
|
||||||
)
|
|
||||||
if not resp.ok:
|
|
||||||
log.error("Upload HTTP %s (attempt %d/3) — body: %s", resp.status_code, attempt, resp.text[:300])
|
|
||||||
resp.raise_for_status()
|
|
||||||
log.info("Uploaded: %s", book_path.name)
|
|
||||||
self._consecutive_failures = 0
|
|
||||||
db.record_book(book_path.name, file_hash, zip_source, "uploaded")
|
|
||||||
# Add to in-session index so a later zip with the same title is skipped
|
|
||||||
if self._existing_title_sets is not None:
|
|
||||||
kw = frozenset(_keywords_from_filename(book_path.name))
|
|
||||||
if kw:
|
|
||||||
self._existing_title_sets.append(kw)
|
|
||||||
return "uploaded"
|
|
||||||
except requests.HTTPError:
|
|
||||||
if resp.status_code in (502, 503, 504):
|
|
||||||
if attempt < 3:
|
|
||||||
log.warning("HTTP %s on attempt %d/3 — retrying in 180s ...", resp.status_code, attempt)
|
|
||||||
time.sleep(180)
|
|
||||||
continue
|
|
||||||
# All retries exhausted
|
|
||||||
self._consecutive_failures += 1
|
|
||||||
if self._consecutive_failures >= 3:
|
|
||||||
raise CalibreUnavailableError(
|
|
||||||
f"Calibre-Web returned {resp.status_code} on {self._consecutive_failures} "
|
|
||||||
"consecutive books — aborting sync run"
|
|
||||||
)
|
|
||||||
break
|
|
||||||
if resp.status_code == 400 and attempt == 1:
|
|
||||||
log.warning("HTTP 400 — CSRF token likely expired, re-authenticating ...")
|
|
||||||
self._authenticated = False
|
|
||||||
self._upload_csrf = None
|
|
||||||
self._ensure_auth()
|
|
||||||
continue
|
|
||||||
break
|
|
||||||
|
|
||||||
db.record_book(book_path.name, file_hash, zip_source, "error")
|
|
||||||
return "error"
|
|
||||||
except CalibreUnavailableError:
|
|
||||||
db.record_book(book_path.name, file_hash, zip_source, "error")
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
log.error("Upload failed for %s: %s", book_path.name, e)
|
|
||||||
db.record_book(book_path.name, file_hash, zip_source, "error")
|
|
||||||
return "error"
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_all_books(cfg: CalibreConfig) -> list[dict]:
|
|
||||||
"""Fetch every book from Calibre-Web. Tries /ajax/listbooks first; falls back to OPDS if pagination is broken."""
|
|
||||||
client = CalibreClient(cfg)
|
|
||||||
client._ensure_auth()
|
|
||||||
all_books: list[dict] = []
|
|
||||||
seen_ids: set = set()
|
|
||||||
page_size = 1000
|
|
||||||
start = 0
|
|
||||||
reported_total = 0
|
|
||||||
while True:
|
|
||||||
resp = client._session.get(
|
|
||||||
f"{cfg.url}/ajax/listbooks",
|
|
||||||
params={
|
|
||||||
"draw": 1,
|
|
||||||
"start": start, "length": page_size,
|
|
||||||
"iDisplayStart": start, "iDisplayLength": page_size,
|
|
||||||
},
|
|
||||||
timeout=60,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
if start == 0:
|
|
||||||
non_list = {k: v for k, v in data.items() if not isinstance(v, list)}
|
|
||||||
log.info("listbooks page-0 meta fields: %s", non_list)
|
|
||||||
rows = data.get("rows") or data.get("data") or []
|
|
||||||
reported_total = (
|
|
||||||
data.get("recordsTotal") or data.get("total_count") or
|
|
||||||
data.get("total") or data.get("totalNotFiltered") or 0
|
|
||||||
)
|
|
||||||
new_in_page = 0
|
|
||||||
for b in rows:
|
|
||||||
bid = b.get("id")
|
|
||||||
if bid not in seen_ids:
|
|
||||||
seen_ids.add(bid)
|
|
||||||
all_books.append(b)
|
|
||||||
new_in_page += 1
|
|
||||||
log.info("Books fetched: %d / %d (page gave %d new)", len(all_books), reported_total, new_in_page)
|
|
||||||
if not rows or new_in_page == 0 or len(all_books) >= reported_total:
|
|
||||||
break
|
|
||||||
start += len(rows)
|
|
||||||
|
|
||||||
# If we got far fewer books than reported, listbooks pagination is broken — use OPDS instead
|
|
||||||
if reported_total > 0 and len(all_books) < reported_total // 2:
|
|
||||||
log.warning(
|
|
||||||
"listbooks pagination broken (%d/%d books retrieved). Falling back to OPDS.",
|
|
||||||
len(all_books), reported_total,
|
|
||||||
)
|
|
||||||
return _fetch_all_books_opds(cfg)
|
|
||||||
return all_books
|
|
||||||
|
|
||||||
|
|
||||||
def _fetch_all_books_opds(cfg: CalibreConfig) -> list[dict]:
|
|
||||||
"""Fetch all books via OPDS catalog, following next-page links."""
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
books: list[dict] = []
|
|
||||||
seen_ids: set = set()
|
|
||||||
url: str | None = f"{cfg.url}/opds/new"
|
|
||||||
auth = (cfg.user, cfg.password)
|
|
||||||
session = requests.Session()
|
|
||||||
|
|
||||||
while url:
|
|
||||||
resp = session.get(url, auth=auth, timeout=30)
|
|
||||||
if not resp.ok:
|
|
||||||
log.warning("OPDS fetch failed HTTP %s — %s", resp.status_code, url)
|
|
||||||
break
|
|
||||||
try:
|
|
||||||
root = ET.fromstring(resp.content)
|
|
||||||
except ET.ParseError as exc:
|
|
||||||
log.warning("OPDS XML parse error: %s", exc)
|
|
||||||
break
|
|
||||||
|
|
||||||
next_url: str | None = None
|
|
||||||
entries_this_page = 0
|
|
||||||
for elem in root:
|
|
||||||
local = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
|
||||||
if local == "link" and elem.get("rel") == "next":
|
|
||||||
href = elem.get("href", "")
|
|
||||||
next_url = href if href.startswith("http") else f"{cfg.url}{href}"
|
|
||||||
elif local == "entry":
|
|
||||||
entries_this_page += 1
|
|
||||||
title = ""
|
|
||||||
author_parts: list[str] = []
|
|
||||||
book_id: int | None = None
|
|
||||||
for child in elem:
|
|
||||||
ctag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
|
|
||||||
if ctag == "title":
|
|
||||||
title = child.text or ""
|
|
||||||
elif ctag == "author":
|
|
||||||
for gc in child:
|
|
||||||
if (gc.tag.split("}")[-1] if "}" in gc.tag else gc.tag) == "name":
|
|
||||||
author_parts.append(gc.text or "")
|
|
||||||
elif ctag == "link":
|
|
||||||
m = re.search(r"/download/(\d+)/", child.get("href", ""))
|
|
||||||
if m and book_id is None:
|
|
||||||
book_id = int(m.group(1))
|
|
||||||
if book_id and book_id not in seen_ids:
|
|
||||||
seen_ids.add(book_id)
|
|
||||||
books.append({"id": book_id, "title": title, "authors": " & ".join(author_parts)})
|
|
||||||
|
|
||||||
log.info("OPDS fetched: %d books total (page had %d entries)", len(books), entries_this_page)
|
|
||||||
if not entries_this_page:
|
|
||||||
break
|
|
||||||
url = next_url
|
|
||||||
|
|
||||||
return books
|
|
||||||
|
|
||||||
|
|
||||||
def delete_book(cfg: CalibreConfig, book_id: int, client: "CalibreClient | None" = None) -> tuple[bool, str]:
|
|
||||||
"""Delete a book from Calibre-Web by ID. Pass a pre-authenticated client to avoid re-auth overhead."""
|
|
||||||
if client is None:
|
|
||||||
client = CalibreClient(cfg)
|
|
||||||
client._ensure_auth()
|
|
||||||
csrf = client._upload_csrf
|
|
||||||
if not csrf:
|
|
||||||
# Try to fetch a CSRF token from the book detail page
|
|
||||||
try:
|
|
||||||
page = client._session.get(f"{cfg.url}/book/{book_id}", timeout=15)
|
|
||||||
csrf = _extract_csrf(page.text)
|
|
||||||
client._upload_csrf = csrf
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
for attempt in range(2):
|
|
||||||
resp = client._session.post(
|
|
||||||
f"{cfg.url}/delete/{book_id}",
|
|
||||||
data={"csrf_token": csrf} if csrf else {},
|
|
||||||
timeout=30,
|
|
||||||
)
|
|
||||||
if resp.ok:
|
|
||||||
return True, "Deleted"
|
|
||||||
if resp.status_code == 400 and attempt == 0:
|
|
||||||
# CSRF token likely expired; re-authenticate and retry once
|
|
||||||
log.info("delete_book: 400 on book %d — refreshing CSRF and retrying", book_id)
|
|
||||||
client._authenticated = False
|
|
||||||
client._upload_csrf = None
|
|
||||||
client._ensure_auth()
|
|
||||||
csrf = client._upload_csrf
|
|
||||||
continue
|
|
||||||
return False, f"HTTP {resp.status_code}"
|
|
||||||
return False, "HTTP 400 after re-auth retry"
|
|
||||||
|
|
||||||
|
|
||||||
def find_duplicate_groups(books: list[dict]) -> list[list[dict]]:
|
|
||||||
"""Group books by normalised title+author; return only groups with 2+ entries."""
|
|
||||||
from collections import defaultdict
|
|
||||||
groups: dict[str, list[dict]] = defaultdict(list)
|
|
||||||
for book in books:
|
|
||||||
title = re.sub(r"[^\w\s]", " ", book.get("title", "").lower())
|
|
||||||
title = re.sub(r"\s+", " ", title).strip()
|
|
||||||
authors = re.sub(r"[^\w\s]", " ", book.get("authors", "").lower())
|
|
||||||
authors = re.sub(r"\s+", " ", authors).strip()
|
|
||||||
key = f"{title}||{authors}"
|
|
||||||
if title:
|
|
||||||
groups[key].append(book)
|
|
||||||
return sorted(
|
|
||||||
[g for g in groups.values() if len(g) > 1],
|
|
||||||
key=lambda g: g[0].get("title", "").lower(),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_connection(cfg: CalibreConfig) -> tuple[bool, str]:
|
|
||||||
try:
|
|
||||||
client = CalibreClient(cfg)
|
|
||||||
client._ensure_auth()
|
|
||||||
return True, f"Authenticated to {cfg.url} as '{cfg.user}'."
|
|
||||||
except Exception as e:
|
|
||||||
return False, str(e)
|
|
||||||
|
|
||||||
|
|
||||||
# --- Helpers ---
|
|
||||||
|
|
||||||
def _ascii_fold(s: str) -> str:
|
|
||||||
"""Strip accents: 'världens' → 'varldens', 'väg' → 'vag'."""
|
|
||||||
return "".join(c for c in unicodedata.normalize("NFKD", s) if unicodedata.category(c) != "Mn")
|
|
||||||
|
|
||||||
|
|
||||||
def _keywords_from_filename(filename: str) -> list[str]:
|
|
||||||
"""Extract meaningful words from a release-style filename for OPDS search."""
|
|
||||||
stem = _ascii_fold(Path(filename).stem.lower())
|
|
||||||
stem = re.sub(r"[._\-]", " ", stem)
|
|
||||||
stem = re.sub(r"[^\w\s]", "", stem)
|
|
||||||
words = stem.split()
|
|
||||||
return [
|
|
||||||
w for w in words
|
|
||||||
if w not in _JUNK_WORDS
|
|
||||||
and not re.match(r"^\d{4}$", w)
|
|
||||||
and not re.match(r"^\d+$", w)
|
|
||||||
and len(w) > 1
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_words(title: str) -> list[str]:
|
|
||||||
"""Normalize a Calibre-Web title for comparison."""
|
|
||||||
title = _ascii_fold(title.lower())
|
|
||||||
title = re.sub(r"[^\w\s]", "", title)
|
|
||||||
return [w for w in title.split() if w not in _JUNK_WORDS and len(w) > 1]
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_opds_titles(xml: str) -> list[str]:
|
|
||||||
"""Extract book titles from an OPDS Atom feed, skipping the feed title itself."""
|
|
||||||
# Grab all <title> elements; the first is the feed title ("Search results"), rest are books
|
|
||||||
titles = re.findall(r"<title>([^<]+)</title>", xml)
|
|
||||||
return titles[1:] if len(titles) > 1 else []
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_csrf(html: str) -> str | None:
|
|
||||||
m = re.search(r'name="csrf_token"\s+value="([^"]+)"', html)
|
|
||||||
if not m:
|
|
||||||
m = re.search(r'value="([^"]+)"\s+name="csrf_token"', html)
|
|
||||||
return m.group(1) if m else None
|
|
||||||
|
|
||||||
|
|
||||||
def _sha256(path: Path) -> str:
|
|
||||||
h = hashlib.sha256()
|
|
||||||
with path.open("rb") as f:
|
|
||||||
for chunk in iter(lambda: f.read(65536), b""):
|
|
||||||
h.update(chunk)
|
|
||||||
return h.hexdigest()
|
|
||||||
Reference in New Issue
Block a user