sync errors
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
import io
|
||||
import logging
|
||||
import posixpath
|
||||
import shlex
|
||||
import socket
|
||||
import stat
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
@@ -81,22 +80,21 @@ def test_connection(cfg: SFTPConfig) -> tuple[bool, str]:
|
||||
|
||||
def list_new_zips(cfg: SFTPConfig, max_results: int | None = None) -> list[RemoteZip]:
|
||||
transport = _make_transport(cfg)
|
||||
sftp = paramiko.SFTPClient.from_transport(transport)
|
||||
try:
|
||||
log.info("Walking remote directory tree from %s ...", cfg.remote_path)
|
||||
all_zips = _find_remote_zips(transport, cfg.remote_path)
|
||||
log.info("Remote scan done: %d zip(s) found", len(all_zips))
|
||||
|
||||
new_zips: list[RemoteZip] = []
|
||||
total_seen = 0
|
||||
for zip_info in _walk_zips_iter(sftp, cfg.remote_path):
|
||||
total_seen += 1
|
||||
for zip_info in all_zips:
|
||||
if not db.is_zip_processed(zip_info.remote_path):
|
||||
new_zips.append(zip_info)
|
||||
if max_results and len(new_zips) >= max_results:
|
||||
log.info("Reached limit of %d — stopping walk early", max_results)
|
||||
log.info("Reached limit of %d", max_results)
|
||||
break
|
||||
log.info("Remote walk done: %d zip(s) seen, %d new", total_seen, len(new_zips))
|
||||
|
||||
log.info("%d new zip(s) to process", len(new_zips))
|
||||
return new_zips
|
||||
finally:
|
||||
sftp.close()
|
||||
transport.close()
|
||||
|
||||
|
||||
@@ -115,24 +113,27 @@ def download(cfg: SFTPConfig, remote_zip: RemoteZip, dest_dir: str) -> Path:
|
||||
return local_path
|
||||
|
||||
|
||||
def _walk_zips_iter(sftp: paramiko.SFTPClient, remote_dir: str):
|
||||
log.info("Listing %s ...", remote_dir)
|
||||
def _find_remote_zips(transport: paramiko.Transport, remote_path: str) -> list[RemoteZip]:
|
||||
"""Single SSH exec: find all .zip files server-side. Vastly faster than per-directory SFTP calls."""
|
||||
channel = transport.open_session()
|
||||
cmd = f"find {shlex.quote(remote_path)} -type f -iname '*.zip' -printf '%s\\t%p\\n'"
|
||||
log.info("Running remote find under %s ...", remote_path)
|
||||
channel.exec_command(cmd)
|
||||
|
||||
zips: list[RemoteZip] = []
|
||||
for line in channel.makefile("r", -1):
|
||||
line = line.rstrip("\n")
|
||||
if "\t" not in line:
|
||||
continue
|
||||
size_str, path = line.split("\t", 1)
|
||||
try:
|
||||
entries = sftp.listdir_attr(remote_dir)
|
||||
except IOError as e:
|
||||
log.warning("Cannot list %s: %s", remote_dir, e)
|
||||
return
|
||||
zips.append(RemoteZip(remote_path=path, file_size=int(size_str)))
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
subdirs = []
|
||||
zips_here = 0
|
||||
for entry in entries:
|
||||
full_path = posixpath.join(remote_dir, entry.filename)
|
||||
if stat.S_ISDIR(entry.st_mode):
|
||||
subdirs.append(full_path)
|
||||
elif entry.filename.lower().endswith(".zip"):
|
||||
yield RemoteZip(remote_path=full_path, file_size=entry.st_size or 0)
|
||||
zips_here += 1
|
||||
|
||||
log.info(" %s: %d entries, %d zip(s), %d subdir(s)", remote_dir, len(entries), zips_here, len(subdirs))
|
||||
for subdir in subdirs:
|
||||
yield from _walk_zips_iter(sftp, subdir)
|
||||
stderr_out = channel.makefile_stderr("r", -1).read().strip()
|
||||
if stderr_out:
|
||||
log.warning("find stderr: %s", stderr_out[:500])
|
||||
channel.recv_exit_status()
|
||||
channel.close()
|
||||
return zips
|
||||
|
||||
Reference in New Issue
Block a user