sync errors

This commit is contained in:
2026-05-10 17:42:24 +02:00
parent 881604b4a5
commit 4fef7fbf00
+30 -29
View File
@@ -1,8 +1,7 @@
import io
import logging
import posixpath
import shlex
import socket
import stat
from dataclasses import dataclass
from pathlib import Path
@@ -81,22 +80,21 @@ def test_connection(cfg: SFTPConfig) -> tuple[bool, str]:
def list_new_zips(cfg: SFTPConfig, max_results: int | None = None) -> list[RemoteZip]:
transport = _make_transport(cfg)
sftp = paramiko.SFTPClient.from_transport(transport)
try:
log.info("Walking remote directory tree from %s ...", cfg.remote_path)
all_zips = _find_remote_zips(transport, cfg.remote_path)
log.info("Remote scan done: %d zip(s) found", len(all_zips))
new_zips: list[RemoteZip] = []
total_seen = 0
for zip_info in _walk_zips_iter(sftp, cfg.remote_path):
total_seen += 1
for zip_info in all_zips:
if not db.is_zip_processed(zip_info.remote_path):
new_zips.append(zip_info)
if max_results and len(new_zips) >= max_results:
log.info("Reached limit of %d — stopping walk early", max_results)
log.info("Reached limit of %d", max_results)
break
log.info("Remote walk done: %d zip(s) seen, %d new", total_seen, len(new_zips))
log.info("%d new zip(s) to process", len(new_zips))
return new_zips
finally:
sftp.close()
transport.close()
@@ -115,24 +113,27 @@ def download(cfg: SFTPConfig, remote_zip: RemoteZip, dest_dir: str) -> Path:
return local_path
def _walk_zips_iter(sftp: paramiko.SFTPClient, remote_dir: str):
log.info("Listing %s ...", remote_dir)
def _find_remote_zips(transport: paramiko.Transport, remote_path: str) -> list[RemoteZip]:
"""Single SSH exec: find all .zip files server-side. Vastly faster than per-directory SFTP calls."""
channel = transport.open_session()
cmd = f"find {shlex.quote(remote_path)} -type f -iname '*.zip' -printf '%s\\t%p\\n'"
log.info("Running remote find under %s ...", remote_path)
channel.exec_command(cmd)
zips: list[RemoteZip] = []
for line in channel.makefile("r", -1):
line = line.rstrip("\n")
if "\t" not in line:
continue
size_str, path = line.split("\t", 1)
try:
entries = sftp.listdir_attr(remote_dir)
except IOError as e:
log.warning("Cannot list %s: %s", remote_dir, e)
return
zips.append(RemoteZip(remote_path=path, file_size=int(size_str)))
except ValueError:
continue
subdirs = []
zips_here = 0
for entry in entries:
full_path = posixpath.join(remote_dir, entry.filename)
if stat.S_ISDIR(entry.st_mode):
subdirs.append(full_path)
elif entry.filename.lower().endswith(".zip"):
yield RemoteZip(remote_path=full_path, file_size=entry.st_size or 0)
zips_here += 1
log.info(" %s: %d entries, %d zip(s), %d subdir(s)", remote_dir, len(entries), zips_here, len(subdirs))
for subdir in subdirs:
yield from _walk_zips_iter(sftp, subdir)
stderr_out = channel.makefile_stderr("r", -1).read().strip()
if stderr_out:
log.warning("find stderr: %s", stderr_out[:500])
channel.recv_exit_status()
channel.close()
return zips