import logging import shutil import zipfile from pathlib import Path import rarfile log = logging.getLogger(__name__) BOOK_EXTENSIONS = {".epub", ".pdf"} def extract(zip_path: Path, work_dir: Path) -> list[Path]: """Unzip, then unrar, then return all epub/pdf paths found.""" extract_root = work_dir / zip_path.stem extract_root.mkdir(parents=True, exist_ok=True) try: _unzip(zip_path, extract_root) rar_files = list(extract_root.rglob("*.rar")) + list(extract_root.rglob("*.RAR")) for rar in rar_files: _unrar(rar, rar.parent) books = [ p for p in extract_root.rglob("*") if p.suffix.lower() in BOOK_EXTENSIONS and p.is_file() ] log.info("Extracted %d book(s) from %s", len(books), zip_path.name) return books except Exception as e: log.error("Failed to extract %s: %s", zip_path, e) shutil.rmtree(extract_root, ignore_errors=True) raise def cleanup(path: Path) -> None: if path.is_dir(): shutil.rmtree(path, ignore_errors=True) elif path.is_file(): path.unlink(missing_ok=True) def _unzip(zip_path: Path, dest: Path) -> None: with zipfile.ZipFile(zip_path, "r") as zf: zf.extractall(dest) log.debug("Unzipped %s → %s", zip_path.name, dest) def _unrar(rar_path: Path, dest: Path) -> None: try: with rarfile.RarFile(rar_path, "r") as rf: rf.extractall(dest) log.debug("Unrared %s → %s", rar_path.name, dest) except rarfile.NeedFirstVolume: log.debug("Skipping non-first volume: %s", rar_path.name) except Exception as e: log.warning("Failed to unrar %s: %s", rar_path.name, e)