From 4fef7fbf0042379989e62f70bfc1d82a20124113 Mon Sep 17 00:00:00 2001 From: grymphen Date: Sun, 10 May 2026 17:42:24 +0200 Subject: [PATCH] sync errors --- sftp.py | 59 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/sftp.py b/sftp.py index 117ef9d..0033307 100644 --- a/sftp.py +++ b/sftp.py @@ -1,8 +1,7 @@ import io import logging -import posixpath +import shlex import socket -import stat from dataclasses import dataclass from pathlib import Path @@ -81,22 +80,21 @@ def test_connection(cfg: SFTPConfig) -> tuple[bool, str]: def list_new_zips(cfg: SFTPConfig, max_results: int | None = None) -> list[RemoteZip]: transport = _make_transport(cfg) - sftp = paramiko.SFTPClient.from_transport(transport) try: - log.info("Walking remote directory tree from %s ...", cfg.remote_path) + all_zips = _find_remote_zips(transport, cfg.remote_path) + log.info("Remote scan done: %d zip(s) found", len(all_zips)) + new_zips: list[RemoteZip] = [] - total_seen = 0 - for zip_info in _walk_zips_iter(sftp, cfg.remote_path): - total_seen += 1 + for zip_info in all_zips: if not db.is_zip_processed(zip_info.remote_path): new_zips.append(zip_info) if max_results and len(new_zips) >= max_results: - log.info("Reached limit of %d — stopping walk early", max_results) + log.info("Reached limit of %d", max_results) break - log.info("Remote walk done: %d zip(s) seen, %d new", total_seen, len(new_zips)) + + log.info("%d new zip(s) to process", len(new_zips)) return new_zips finally: - sftp.close() transport.close() @@ -115,24 +113,27 @@ def download(cfg: SFTPConfig, remote_zip: RemoteZip, dest_dir: str) -> Path: return local_path -def _walk_zips_iter(sftp: paramiko.SFTPClient, remote_dir: str): - log.info("Listing %s ...", remote_dir) - try: - entries = sftp.listdir_attr(remote_dir) - except IOError as e: - log.warning("Cannot list %s: %s", remote_dir, e) - return +def _find_remote_zips(transport: paramiko.Transport, remote_path: str) -> list[RemoteZip]: + """Single SSH exec: find all .zip files server-side. Vastly faster than per-directory SFTP calls.""" + channel = transport.open_session() + cmd = f"find {shlex.quote(remote_path)} -type f -iname '*.zip' -printf '%s\\t%p\\n'" + log.info("Running remote find under %s ...", remote_path) + channel.exec_command(cmd) - subdirs = [] - zips_here = 0 - for entry in entries: - full_path = posixpath.join(remote_dir, entry.filename) - if stat.S_ISDIR(entry.st_mode): - subdirs.append(full_path) - elif entry.filename.lower().endswith(".zip"): - yield RemoteZip(remote_path=full_path, file_size=entry.st_size or 0) - zips_here += 1 + zips: list[RemoteZip] = [] + for line in channel.makefile("r", -1): + line = line.rstrip("\n") + if "\t" not in line: + continue + size_str, path = line.split("\t", 1) + try: + zips.append(RemoteZip(remote_path=path, file_size=int(size_str))) + except ValueError: + continue - log.info(" %s: %d entries, %d zip(s), %d subdir(s)", remote_dir, len(entries), zips_here, len(subdirs)) - for subdir in subdirs: - yield from _walk_zips_iter(sftp, subdir) + stderr_out = channel.makefile_stderr("r", -1).read().strip() + if stderr_out: + log.warning("find stderr: %s", stderr_out[:500]) + channel.recv_exit_status() + channel.close() + return zips