From 246606161bf94c8d9bff2b76c950471d3d25064c Mon Sep 17 00:00:00 2001 From: grymphen Date: Sun, 10 May 2026 16:33:24 +0200 Subject: [PATCH] sync errors --- sftp.py | 25 +++++++++++++++---------- sync.py | 6 +----- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/sftp.py b/sftp.py index fb15577..117ef9d 100644 --- a/sftp.py +++ b/sftp.py @@ -79,14 +79,21 @@ def test_connection(cfg: SFTPConfig) -> tuple[bool, str]: return False, str(e) -def list_new_zips(cfg: SFTPConfig) -> list[RemoteZip]: +def list_new_zips(cfg: SFTPConfig, max_results: int | None = None) -> list[RemoteZip]: transport = _make_transport(cfg) sftp = paramiko.SFTPClient.from_transport(transport) try: log.info("Walking remote directory tree from %s ...", cfg.remote_path) - all_zips = _walk_zips(sftp, cfg.remote_path) - new_zips = [z for z in all_zips if not db.is_zip_processed(z.remote_path)] - log.info("Remote: %d zip(s) total, %d new", len(all_zips), len(new_zips)) + new_zips: list[RemoteZip] = [] + total_seen = 0 + for zip_info in _walk_zips_iter(sftp, cfg.remote_path): + total_seen += 1 + if not db.is_zip_processed(zip_info.remote_path): + new_zips.append(zip_info) + if max_results and len(new_zips) >= max_results: + log.info("Reached limit of %d — stopping walk early", max_results) + break + log.info("Remote walk done: %d zip(s) seen, %d new", total_seen, len(new_zips)) return new_zips finally: sftp.close() @@ -108,14 +115,13 @@ def download(cfg: SFTPConfig, remote_zip: RemoteZip, dest_dir: str) -> Path: return local_path -def _walk_zips(sftp: paramiko.SFTPClient, remote_dir: str) -> list[RemoteZip]: - results: list[RemoteZip] = [] +def _walk_zips_iter(sftp: paramiko.SFTPClient, remote_dir: str): log.info("Listing %s ...", remote_dir) try: entries = sftp.listdir_attr(remote_dir) except IOError as e: log.warning("Cannot list %s: %s", remote_dir, e) - return results + return subdirs = [] zips_here = 0 @@ -124,10 +130,9 @@ def _walk_zips(sftp: paramiko.SFTPClient, remote_dir: str) -> list[RemoteZip]: if stat.S_ISDIR(entry.st_mode): subdirs.append(full_path) elif entry.filename.lower().endswith(".zip"): - results.append(RemoteZip(remote_path=full_path, file_size=entry.st_size or 0)) + yield RemoteZip(remote_path=full_path, file_size=entry.st_size or 0) zips_here += 1 log.info(" %s: %d entries, %d zip(s), %d subdir(s)", remote_dir, len(entries), zips_here, len(subdirs)) for subdir in subdirs: - results.extend(_walk_zips(sftp, subdir)) - return results + yield from _walk_zips_iter(sftp, subdir) diff --git a/sync.py b/sync.py index 99f2ccd..0f0c137 100644 --- a/sync.py +++ b/sync.py @@ -45,13 +45,9 @@ def run_sync(limit: int | None = None) -> None: log.info("Work dir ready: %s", work_dir) log.info("Connecting to SFTP %s@%s:%s ...", cfg.sftp.user, cfg.sftp.host, cfg.sftp.port) - new_zips = sftp_module.list_new_zips(cfg.sftp) + new_zips = sftp_module.list_new_zips(cfg.sftp, max_results=limit) counters["zips_found"] = len(new_zips) - # Test mode: cap at the explicit limit - if limit is not None: - new_zips = new_zips[:limit] - counters["zips_new"] = len(new_zips) if not new_zips: