sync errors

This commit is contained in:
2026-05-10 17:49:42 +02:00
parent 4fef7fbf00
commit e754b14085
3 changed files with 23 additions and 2 deletions
+7
View File
@@ -103,6 +103,13 @@ def is_zip_processed(remote_path: str) -> bool:
return row is not None
def get_all_processed_paths() -> set[str]:
"""Return all processed remote paths as a set for fast bulk membership checks."""
with get_db() as conn:
rows = conn.execute("SELECT remote_path FROM processed_zips").fetchall()
return {row["remote_path"] for row in rows}
def mark_zip_processed(remote_path: str, file_size: int, status: str, error_msg: str | None = None) -> None:
with get_db() as conn:
conn.execute(
+8 -2
View File
@@ -2,6 +2,7 @@ import io
import logging
import shlex
import socket
import time
from dataclasses import dataclass
from pathlib import Path
@@ -81,12 +82,17 @@ def test_connection(cfg: SFTPConfig) -> tuple[bool, str]:
def list_new_zips(cfg: SFTPConfig, max_results: int | None = None) -> list[RemoteZip]:
transport = _make_transport(cfg)
try:
t0 = time.monotonic()
all_zips = _find_remote_zips(transport, cfg.remote_path)
log.info("Remote scan done: %d zip(s) found", len(all_zips))
log.info("Remote find done in %.1fs — %d zip(s) found", time.monotonic() - t0, len(all_zips))
t1 = time.monotonic()
processed = db.get_all_processed_paths()
log.info("DB lookup done in %.1fs — %d path(s) already processed", time.monotonic() - t1, len(processed))
new_zips: list[RemoteZip] = []
for zip_info in all_zips:
if not db.is_zip_processed(zip_info.remote_path):
if zip_info.remote_path not in processed:
new_zips.append(zip_info)
if max_results and len(new_zips) >= max_results:
log.info("Reached limit of %d", max_results)
+8
View File
@@ -1,5 +1,6 @@
import logging
import threading
import time
from pathlib import Path
import config
@@ -72,11 +73,18 @@ def run_sync(limit: int | None = None) -> None:
zip_error = None
local_zip = None
try:
t0 = time.monotonic()
local_zip = sftp_module.download(cfg.sftp, remote_zip, str(work_dir / "downloads"))
log.info("Download done in %.1fs (%.1f MB)", time.monotonic() - t0, local_zip.stat().st_size / 1_048_576)
t1 = time.monotonic()
books = extractor.extract(local_zip, work_dir / "extracted")
log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books))
for book in books:
t2 = time.monotonic()
status = client.upload(book, zip_source=remote_zip.remote_path)
log.info("Upload '%s'%s (%.1fs)", book.name, status, time.monotonic() - t2)
if status == "uploaded":
counters["books_uploaded"] += 1
elif status == "skipped_duplicate":