diff --git a/db.py b/db.py index d4a8f74..e759dc7 100644 --- a/db.py +++ b/db.py @@ -110,9 +110,11 @@ def is_zip_processed(remote_path: str) -> bool: def get_all_processed_paths() -> set[str]: - """Return all processed remote paths as a set for fast bulk membership checks.""" + """Return successfully-processed remote paths. Errored zips are excluded so they get retried.""" with get_db() as conn: - rows = conn.execute("SELECT remote_path FROM processed_zips").fetchall() + rows = conn.execute( + "SELECT remote_path FROM processed_zips WHERE status = 'success'" + ).fetchall() return {row["remote_path"] for row in rows} @@ -174,7 +176,8 @@ def get_recent_zips(limit: int = 50) -> list[sqlite3.Row]: def is_book_uploaded(file_hash: str) -> bool: with get_db() as conn: row = conn.execute( - "SELECT id FROM uploaded_books WHERE file_hash = ?", (file_hash,) + "SELECT id FROM uploaded_books WHERE file_hash = ? AND status IN ('uploaded', 'skipped_duplicate')", + (file_hash,), ).fetchone() return row is not None diff --git a/sync.py b/sync.py index e7b39d6..0939934 100644 --- a/sync.py +++ b/sync.py @@ -81,6 +81,7 @@ def run_sync(limit: int | None = None) -> None: books = extractor.extract(local_zip, work_dir / "extracted") log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books)) + books_errored_this_zip = 0 for book in books: t2 = time.monotonic() status = client.upload(book, zip_source=remote_zip.remote_path) @@ -91,6 +92,11 @@ def run_sync(limit: int | None = None) -> None: counters["books_skipped"] += 1 else: counters["books_errored"] += 1 + books_errored_this_zip += 1 + + if books_errored_this_zip: + zip_status = "error" + zip_error = f"{books_errored_this_zip} book upload(s) failed — will retry next sync" extractor.cleanup(work_dir / "extracted" / local_zip.stem) except Exception as e: diff --git a/uploader.py b/uploader.py index 4bc7314..ae4c150 100644 --- a/uploader.py +++ b/uploader.py @@ -1,6 +1,7 @@ import hashlib import logging import re +import time from pathlib import Path from urllib.parse import quote @@ -101,20 +102,37 @@ class CalibreClient: return "skipped_duplicate" mime = MIME_TYPES.get(book_path.suffix.lower(), "application/octet-stream") - with book_path.open("rb") as fh: - resp = self._session.post( - f"{self._cfg.url}/upload", - files={"btn-upload": (book_path.name, fh, mime)}, - data={"csrf_token": self._upload_csrf} if self._upload_csrf else {}, - timeout=120, - ) - if not resp.ok: - log.error("Upload HTTP %s — body: %s", resp.status_code, resp.text[:300]) - resp.raise_for_status() - log.info("Uploaded: %s", book_path.name) - db.record_book(book_path.name, file_hash, zip_source, "uploaded") - return "uploaded" - except requests.HTTPError: + last_err: Exception | None = None + for attempt in range(1, 4): + try: + with book_path.open("rb") as fh: + resp = self._session.post( + f"{self._cfg.url}/upload", + files={"btn-upload": (book_path.name, fh, mime)}, + data={"csrf_token": self._upload_csrf} if self._upload_csrf else {}, + timeout=120, + ) + if not resp.ok: + log.error("Upload HTTP %s (attempt %d/3) — body: %s", resp.status_code, attempt, resp.text[:300]) + resp.raise_for_status() + log.info("Uploaded: %s", book_path.name) + db.record_book(book_path.name, file_hash, zip_source, "uploaded") + return "uploaded" + except requests.HTTPError as e: + last_err = e + if resp.status_code in (502, 503, 504) and attempt < 3: + delay = 1 if attempt == 1 else 3 + log.warning("HTTP %s on attempt %d/3 — retrying in %ds ...", resp.status_code, attempt, delay) + time.sleep(delay) + continue + if resp.status_code == 400 and attempt == 1: + log.warning("HTTP 400 — CSRF token likely expired, re-authenticating ...") + self._authenticated = False + self._upload_csrf = None + self._ensure_auth() + continue + break + db.record_book(book_path.name, file_hash, zip_source, "error") return "error" except Exception as e: