Files
calibresync/extractor.py
T
2026-05-09 18:41:03 +02:00

58 lines
1.7 KiB
Python

import logging
import shutil
import zipfile
from pathlib import Path
import rarfile
log = logging.getLogger(__name__)
BOOK_EXTENSIONS = {".epub", ".pdf"}
def extract(zip_path: Path, work_dir: Path) -> list[Path]:
"""Unzip, then unrar, then return all epub/pdf paths found."""
extract_root = work_dir / zip_path.stem
extract_root.mkdir(parents=True, exist_ok=True)
try:
_unzip(zip_path, extract_root)
rar_files = list(extract_root.rglob("*.rar")) + list(extract_root.rglob("*.RAR"))
for rar in rar_files:
_unrar(rar, rar.parent)
books = [
p for p in extract_root.rglob("*")
if p.suffix.lower() in BOOK_EXTENSIONS and p.is_file()
]
log.info("Extracted %d book(s) from %s", len(books), zip_path.name)
return books
except Exception as e:
log.error("Failed to extract %s: %s", zip_path, e)
shutil.rmtree(extract_root, ignore_errors=True)
raise
def cleanup(path: Path) -> None:
if path.is_dir():
shutil.rmtree(path, ignore_errors=True)
elif path.is_file():
path.unlink(missing_ok=True)
def _unzip(zip_path: Path, dest: Path) -> None:
with zipfile.ZipFile(zip_path, "r") as zf:
zf.extractall(dest)
log.debug("Unzipped %s%s", zip_path.name, dest)
def _unrar(rar_path: Path, dest: Path) -> None:
try:
with rarfile.RarFile(rar_path, "r") as rf:
rf.extractall(dest)
log.debug("Unrared %s%s", rar_path.name, dest)
except rarfile.NeedFirstVolume:
log.debug("Skipping non-first volume: %s", rar_path.name)
except Exception as e:
log.warning("Failed to unrar %s: %s", rar_path.name, e)