sync errors
This commit is contained in:
@@ -103,6 +103,13 @@ def is_zip_processed(remote_path: str) -> bool:
|
|||||||
return row is not None
|
return row is not None
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_processed_paths() -> set[str]:
|
||||||
|
"""Return all processed remote paths as a set for fast bulk membership checks."""
|
||||||
|
with get_db() as conn:
|
||||||
|
rows = conn.execute("SELECT remote_path FROM processed_zips").fetchall()
|
||||||
|
return {row["remote_path"] for row in rows}
|
||||||
|
|
||||||
|
|
||||||
def mark_zip_processed(remote_path: str, file_size: int, status: str, error_msg: str | None = None) -> None:
|
def mark_zip_processed(remote_path: str, file_size: int, status: str, error_msg: str | None = None) -> None:
|
||||||
with get_db() as conn:
|
with get_db() as conn:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import io
|
|||||||
import logging
|
import logging
|
||||||
import shlex
|
import shlex
|
||||||
import socket
|
import socket
|
||||||
|
import time
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -81,12 +82,17 @@ def test_connection(cfg: SFTPConfig) -> tuple[bool, str]:
|
|||||||
def list_new_zips(cfg: SFTPConfig, max_results: int | None = None) -> list[RemoteZip]:
|
def list_new_zips(cfg: SFTPConfig, max_results: int | None = None) -> list[RemoteZip]:
|
||||||
transport = _make_transport(cfg)
|
transport = _make_transport(cfg)
|
||||||
try:
|
try:
|
||||||
|
t0 = time.monotonic()
|
||||||
all_zips = _find_remote_zips(transport, cfg.remote_path)
|
all_zips = _find_remote_zips(transport, cfg.remote_path)
|
||||||
log.info("Remote scan done: %d zip(s) found", len(all_zips))
|
log.info("Remote find done in %.1fs — %d zip(s) found", time.monotonic() - t0, len(all_zips))
|
||||||
|
|
||||||
|
t1 = time.monotonic()
|
||||||
|
processed = db.get_all_processed_paths()
|
||||||
|
log.info("DB lookup done in %.1fs — %d path(s) already processed", time.monotonic() - t1, len(processed))
|
||||||
|
|
||||||
new_zips: list[RemoteZip] = []
|
new_zips: list[RemoteZip] = []
|
||||||
for zip_info in all_zips:
|
for zip_info in all_zips:
|
||||||
if not db.is_zip_processed(zip_info.remote_path):
|
if zip_info.remote_path not in processed:
|
||||||
new_zips.append(zip_info)
|
new_zips.append(zip_info)
|
||||||
if max_results and len(new_zips) >= max_results:
|
if max_results and len(new_zips) >= max_results:
|
||||||
log.info("Reached limit of %d", max_results)
|
log.info("Reached limit of %d", max_results)
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import config
|
import config
|
||||||
@@ -72,11 +73,18 @@ def run_sync(limit: int | None = None) -> None:
|
|||||||
zip_error = None
|
zip_error = None
|
||||||
local_zip = None
|
local_zip = None
|
||||||
try:
|
try:
|
||||||
|
t0 = time.monotonic()
|
||||||
local_zip = sftp_module.download(cfg.sftp, remote_zip, str(work_dir / "downloads"))
|
local_zip = sftp_module.download(cfg.sftp, remote_zip, str(work_dir / "downloads"))
|
||||||
|
log.info("Download done in %.1fs (%.1f MB)", time.monotonic() - t0, local_zip.stat().st_size / 1_048_576)
|
||||||
|
|
||||||
|
t1 = time.monotonic()
|
||||||
books = extractor.extract(local_zip, work_dir / "extracted")
|
books = extractor.extract(local_zip, work_dir / "extracted")
|
||||||
|
log.info("Extract done in %.1fs — %d book(s)", time.monotonic() - t1, len(books))
|
||||||
|
|
||||||
for book in books:
|
for book in books:
|
||||||
|
t2 = time.monotonic()
|
||||||
status = client.upload(book, zip_source=remote_zip.remote_path)
|
status = client.upload(book, zip_source=remote_zip.remote_path)
|
||||||
|
log.info("Upload '%s' → %s (%.1fs)", book.name, status, time.monotonic() - t2)
|
||||||
if status == "uploaded":
|
if status == "uploaded":
|
||||||
counters["books_uploaded"] += 1
|
counters["books_uploaded"] += 1
|
||||||
elif status == "skipped_duplicate":
|
elif status == "skipped_duplicate":
|
||||||
|
|||||||
Reference in New Issue
Block a user