cwa import

This commit is contained in:
2026-05-13 18:24:55 +02:00
parent bfa09976b7
commit c0e1cb0688
11 changed files with 60 additions and 886 deletions
+19 -44
View File
@@ -1,4 +1,5 @@
import logging
import shutil
import threading
import time
from pathlib import Path
@@ -7,7 +8,6 @@ import config
import db
import extractor
import sftp as sftp_module
from uploader import CalibreClient, CalibreUnavailableError
log = logging.getLogger(__name__)
@@ -33,22 +33,23 @@ def run_sync(limit: int | None = None) -> None:
_running = True
run_id = db.start_sync_run()
counters = dict(zips_found=0, zips_new=0, books_uploaded=0, books_skipped=0, books_errored=0)
counters = dict(zips_found=0, zips_new=0, books_imported=0, books_errored=0)
try:
log.info("Sync started (limit=%s)", limit)
cfg = config.load()
_validate_config(cfg)
log.info("Config OK — work dir: %s", cfg.local_work_dir)
log.info("Config OK — work dir: %s, import dir: %s", cfg.work_dir, cfg.import_dir)
work_dir = Path(cfg.local_work_dir)
work_dir = Path(cfg.work_dir)
work_dir.mkdir(parents=True, exist_ok=True)
log.info("Work dir ready: %s", work_dir)
import_dir = Path(cfg.import_dir)
import_dir.mkdir(parents=True, exist_ok=True)
log.info("Connecting to SFTP %s@%s:%s ...", cfg.sftp.user, cfg.sftp.host, cfg.sftp.port)
new_zips = sftp_module.list_new_zips(cfg.sftp, max_results=limit)
counters["zips_found"] = len(new_zips)
counters["zips_new"] = len(new_zips)
if not new_zips:
@@ -56,21 +57,11 @@ def run_sync(limit: int | None = None) -> None:
db.finish_sync_run(run_id, status="success", **counters)
return
# Determine chunk size; 0 means process everything in one chunk
batch_size = int(db.get_setting("sync_batch_size", "0") or "0")
if batch_size <= 0:
batch_size = len(new_zips)
total_batches = -(-len(new_zips) // batch_size) # ceiling division
client = CalibreClient(cfg.calibre)
# Pre-load existing book titles so duplicate detection doesn't need per-book OPDS searches
try:
from uploader import fetch_all_books
existing = fetch_all_books(cfg.calibre)
client.preload_existing_titles(existing)
except Exception as exc:
log.warning("Could not pre-load existing books (%s) — will fall back to per-book OPDS search", exc)
total_batches = -(-len(new_zips) // batch_size)
for batch_num, i in enumerate(range(0, len(new_zips), batch_size), start=1):
chunk = new_zips[i : i + batch_size]
@@ -89,34 +80,21 @@ def run_sync(limit: int | None = None) -> None:
books = extractor.extract(local_zip, work_dir / "extracted")
log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books))
books_errored_this_zip = 0
for book in books:
t2 = time.monotonic()
status = client.upload(book, zip_source=remote_zip.remote_path)
log.info("Upload '%s' %s (%.1fs)", book.name, status, time.monotonic() - t2)
time.sleep(2)
if status == "uploaded":
counters["books_uploaded"] += 1
elif status == "skipped_duplicate":
counters["books_skipped"] += 1
dest = import_dir / book.name
if dest.exists():
log.info("Skipping '%s' — already exists in import dir", book.name)
else:
counters["books_errored"] += 1
books_errored_this_zip += 1
if books_errored_this_zip:
zip_status = "error"
zip_error = f"{books_errored_this_zip} book upload(s) failed — will retry next sync"
shutil.move(str(book), str(dest))
log.info("Moved '%s'%s", book.name, import_dir)
counters["books_imported"] += 1
extractor.cleanup(work_dir / "extracted" / local_zip.stem)
except CalibreUnavailableError as e:
log.error("Calibre-Web unavailable — aborting sync run: %s", e)
db.mark_zip_processed(remote_zip.remote_path, remote_zip.file_size, "error", str(e))
db.finish_sync_run(run_id, status="error", error_msg=str(e), **counters)
return
except Exception as e:
log.error("Error processing %s: %s", remote_zip.remote_path, e)
zip_status = "error"
zip_error = str(e)
counters["books_errored"] += 1
finally:
if local_zip and local_zip.exists():
extractor.cleanup(local_zip)
@@ -126,9 +104,8 @@ def run_sync(limit: int | None = None) -> None:
db.finish_sync_run(run_id, status="success", **counters)
log.info(
"Sync complete. Total zips: %d, Uploaded: %d, Skipped: %d, Errors: %d",
counters["zips_new"], counters["books_uploaded"],
counters["books_skipped"], counters["books_errored"],
"Sync complete. Total zips: %d, Imported: %d, Errors: %d",
counters["zips_new"], counters["books_imported"], counters["books_errored"],
)
except Exception as e:
log.exception("Sync run failed: %s", e)
@@ -150,9 +127,7 @@ def _validate_config(cfg) -> None:
missing.append("SSH private key")
if cfg.sftp.auth_method == "password" and not cfg.sftp.password:
missing.append("SSH password")
if not cfg.calibre.url:
missing.append("Calibre-Web URL")
if not cfg.calibre.user:
missing.append("Calibre-Web username")
if not cfg.import_dir:
missing.append("CWA import folder")
if missing:
raise ValueError(f"Missing configuration: {', '.join(missing)}")