sync errors
This commit is contained in:
@@ -103,6 +103,13 @@ def is_zip_processed(remote_path: str) -> bool:
|
||||
return row is not None
|
||||
|
||||
|
||||
def get_all_processed_paths() -> set[str]:
|
||||
"""Return all processed remote paths as a set for fast bulk membership checks."""
|
||||
with get_db() as conn:
|
||||
rows = conn.execute("SELECT remote_path FROM processed_zips").fetchall()
|
||||
return {row["remote_path"] for row in rows}
|
||||
|
||||
|
||||
def mark_zip_processed(remote_path: str, file_size: int, status: str, error_msg: str | None = None) -> None:
|
||||
with get_db() as conn:
|
||||
conn.execute(
|
||||
|
||||
@@ -2,6 +2,7 @@ import io
|
||||
import logging
|
||||
import shlex
|
||||
import socket
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
@@ -81,12 +82,17 @@ def test_connection(cfg: SFTPConfig) -> tuple[bool, str]:
|
||||
def list_new_zips(cfg: SFTPConfig, max_results: int | None = None) -> list[RemoteZip]:
|
||||
transport = _make_transport(cfg)
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
all_zips = _find_remote_zips(transport, cfg.remote_path)
|
||||
log.info("Remote scan done: %d zip(s) found", len(all_zips))
|
||||
log.info("Remote find done in %.1fs — %d zip(s) found", time.monotonic() - t0, len(all_zips))
|
||||
|
||||
t1 = time.monotonic()
|
||||
processed = db.get_all_processed_paths()
|
||||
log.info("DB lookup done in %.1fs — %d path(s) already processed", time.monotonic() - t1, len(processed))
|
||||
|
||||
new_zips: list[RemoteZip] = []
|
||||
for zip_info in all_zips:
|
||||
if not db.is_zip_processed(zip_info.remote_path):
|
||||
if zip_info.remote_path not in processed:
|
||||
new_zips.append(zip_info)
|
||||
if max_results and len(new_zips) >= max_results:
|
||||
log.info("Reached limit of %d", max_results)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import config
|
||||
@@ -72,11 +73,18 @@ def run_sync(limit: int | None = None) -> None:
|
||||
zip_error = None
|
||||
local_zip = None
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
local_zip = sftp_module.download(cfg.sftp, remote_zip, str(work_dir / "downloads"))
|
||||
log.info("Download done in %.1fs (%.1f MB)", time.monotonic() - t0, local_zip.stat().st_size / 1_048_576)
|
||||
|
||||
t1 = time.monotonic()
|
||||
books = extractor.extract(local_zip, work_dir / "extracted")
|
||||
log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books))
|
||||
|
||||
for book in books:
|
||||
t2 = time.monotonic()
|
||||
status = client.upload(book, zip_source=remote_zip.remote_path)
|
||||
log.info("Upload '%s' → %s (%.1fs)", book.name, status, time.monotonic() - t2)
|
||||
if status == "uploaded":
|
||||
counters["books_uploaded"] += 1
|
||||
elif status == "skipped_duplicate":
|
||||
|
||||
Reference in New Issue
Block a user