#!/usr/bin/env python3
"""
ftp_sync.py

A small CLI utility to connect to an FTP/FTPS server, scan a remote directory,
compare it to a local directory, and download only the files that are present on
remote but missing locally.

- Uses MLSD when available for reliable listings; falls back to NLST + probing
- Supports recursive mode
- Supports FTPS (explicit TLS)
- Supports passive/active mode selection
- Supports dry-run mode

Example usage:
  python ftp_sync.py \
    --host ftp.example.com --user foo --password bar \
    --remote-dir /pub/data --local-dir ./downloads --recursive

"""

from __future__ import annotations

import argparse
import getpass
import os
import sys
from contextlib import contextmanager
from dataclasses import dataclass
from ftplib import FTP, FTP_TLS, error_perm, all_errors
from pathlib import Path
from typing import Iterable, Iterator, List, Optional, Sequence, Set, Tuple


@dataclass
class FtpConnectionOptions:
    host: str
    port: int
    username: str
    password: str
    use_tls: bool
    passive: bool
    timeout: Optional[float]


@dataclass
class SyncOptions:
    remote_dir: str
    local_dir: Path
    recursive: bool
    dry_run: bool
    verbose: bool


def parse_args(argv: Optional[Sequence[str]] = None) -> Tuple[FtpConnectionOptions, SyncOptions]:
    parser = argparse.ArgumentParser(
        description=(
            "Connect to an FTP/FTPS server and download only files present on remote "
            "but missing locally."
        )
    )

    parser.add_argument("--host", required=True, help="FTP server hostname or IP")
    parser.add_argument("--port", type=int, default=21, help="FTP server port (default: 21)")
    parser.add_argument("--user", dest="username", help="Username for FTP login")
    parser.add_argument("--password", help="Password for FTP login; will prompt if omitted")
    parser.add_argument(
        "--tls",
        action="store_true",
        help="Use explicit FTPS (TLS). Calls PROT P for encrypted data channel.",
    )
    parser.add_argument(
        "--no-passive",
        action="store_true",
        help="Use active mode (PASV off). Default is passive mode.",
    )
    parser.add_argument(
        "--timeout",
        type=float,
        default=None,
        help="Socket timeout in seconds (optional)",
    )

    parser.add_argument(
        "--remote-dir",
        required=True,
        help="Remote directory to scan (absolute or relative to login directory)",
    )
    parser.add_argument(
        "--local-dir",
        required=True,
        help="Local directory to compare/download into",
    )
    parser.add_argument(
        "--recursive",
        action="store_true",
        help="Recurse into subdirectories (default: off)",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Print actions without downloading files",
    )
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="Verbose logging",
    )

    args = parser.parse_args(argv)

    username = args.username or "anonymous"
    if args.password is None:
        # Prompt only when a username that typically needs a password is set
        if username != "anonymous":
            password = getpass.getpass("Password: ")
        else:
            # Anonymous FTP often expects an email-like password
            password = "anonymous@"
    else:
        password = args.password

    ftp_opts = FtpConnectionOptions(
        host=args.host,
        port=args.port,
        username=username,
        password=password,
        use_tls=bool(args.tls),
        passive=not bool(args.no_passive),
        timeout=args.timeout,
    )

    sync_opts = SyncOptions(
        remote_dir=args.remote_dir,
        local_dir=Path(args.local_dir).expanduser().resolve(),
        recursive=bool(args.recursive),
        dry_run=bool(args.dry_run),
        verbose=bool(args.verbose),
    )

    return ftp_opts, sync_opts


def log(message: str, verbose: bool = True) -> None:
    if verbose:
        print(message, flush=True)


def connect_ftp(options: FtpConnectionOptions, verbose: bool) -> FTP:
    ClientClass = FTP_TLS if options.use_tls else FTP
    client: FTP = ClientClass()
    if options.timeout is not None:
        client.timeout = options.timeout

    log(f"Connecting to {options.host}:{options.port}...", verbose)
    client.connect(host=options.host, port=options.port, timeout=options.timeout)
    log("Logging in...", verbose)
    client.login(user=options.username, passwd=options.password)

    # Configure TLS data protection if using FTPS
    if isinstance(client, FTP_TLS):
        # Encrypt data channel
        client.prot_p()

    # Passive vs active
    client.set_pasv(options.passive)

    return client


@contextmanager
def ftp_cwd(ftp: FTP, new_dir: str) -> Iterator[None]:
    """Context manager to change directory and return back safely."""
    prev = ftp.pwd()
    ftp.cwd(new_dir)
    try:
        yield
    finally:
        ftp.cwd(prev)


def try_mlsd(ftp: FTP, path: str) -> Optional[List[Tuple[str, dict]]]:
    """Attempt MLSD listing. Return list of (name, facts) or None if unsupported."""
    try:
        entries: List[Tuple[str, dict]] = []
        with ftp_cwd(ftp, path):
            for name, facts in ftp.mlsd():  # type: ignore[attr-defined]
                entries.append((name, facts))
        return entries
    except AttributeError:
        # MLSD not available in ftplib in very old Pythons
        return None
    except error_perm as exc:
        # 500/502 MLSD not understood (unsupported by server)
        msg = str(exc).lower()
        if "mlsd" in msg or exc.args and exc.args[0].startswith("500"):
            return None
        raise


def is_directory_via_cwd_probe(ftp: FTP, path: str) -> bool:
    """Determine if the given path is a directory by attempting to CWD into it."""
    try:
        with ftp_cwd(ftp, path):
            return True
    except all_errors:
        return False


def list_remote_files(ftp: FTP, base_dir: str, recursive: bool, verbose: bool) -> List[str]:
    """
    Return a list of remote file paths relative to base_dir.
    Uses MLSD when available; otherwise falls back to NLST + CWD probing.
    """
    normalized_base = base_dir.rstrip("/") or "/"

    # First try MLSD for robust type detection
    def list_with_mlsd() -> Optional[List[str]]:
        root_entries = try_mlsd(ftp, normalized_base)
        if root_entries is None:
            return None

        files: List[str] = []

        def walk(dir_path: str) -> None:
            entries = try_mlsd(ftp, dir_path)
            if entries is None:
                # MLSD unsupported for subcall; treat as None to trigger fallback globally
                raise RuntimeError("MLSD unexpectedly unsupported on subcall")
            for name, facts in entries:
                if name in (".", ".."):
                    continue
                type_fact = facts.get("type", "")
                joined = f"{dir_path.rstrip('/')}/{name}" if dir_path != "/" else f"/{name}"
                if type_fact == "dir":
                    if recursive:
                        walk(joined)
                elif type_fact == "file":
                    # Append relative path
                    rel = joined[len(normalized_base):].lstrip("/")
                    if rel:
                        files.append(rel)
                else:
                    # Skip unknown types (e.g., links) for safety
                    pass

        walk(normalized_base)
        return files

    files = list_with_mlsd()
    if files is not None:
        log(f"Listed remote files using MLSD under '{normalized_base}'", verbose)
        return sorted(files)

    # Fallback: NLST + probing
    log("MLSD unsupported. Falling back to NLST + directory probing.", verbose)

    files: List[str] = []

    def walk_fallback(dir_path: str, rel_prefix: str = "") -> None:
        with ftp_cwd(ftp, dir_path):
            try:
                names = ftp.nlst()
            except error_perm as exc:
                # Some servers return error when directory is empty
                msg = str(exc).lower()
                if "no files" in msg or exc.args and exc.args[0].startswith("550"):
                    return
                raise
            for name in names:
                if name in (".", ".."):
                    continue
                # `name` may be absolute or relative depending on server; normalize
                if "/" in name:
                    # Some servers return absolute paths; compute a display name
                    display_name = name.rsplit("/", 1)[-1]
                else:
                    display_name = name
                possible_dir_path = f"{dir_path.rstrip('/')}/{display_name}" if dir_path != "/" else f"/{display_name}"
                if is_directory_via_cwd_probe(ftp, possible_dir_path):
                    if recursive:
                        new_rel_prefix = f"{rel_prefix}{display_name}/"
                        walk_fallback(possible_dir_path, new_rel_prefix)
                else:
                    files.append(f"{rel_prefix}{display_name}")

    walk_fallback(normalized_base)
    return sorted(files)


def list_local_files(local_dir: Path, recursive: bool) -> Set[str]:
    print(f"Listing local files in {local_dir} recursively: {recursive}")
    if not local_dir.exists():
        return set()
    if recursive:
        results: Set[str] = set()
        for root, _, files in os.walk(local_dir):
            root_path = Path(root)
            for file_name in files:
                full = root_path / file_name
                rel = str(full.relative_to(local_dir)).replace(os.sep, "/")
                results.add(rel)
        return results
    else:
        return {p.name for p in local_dir.iterdir() if p.is_file()}


def ensure_parent_directory(path: Path) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)


def download_file(ftp: FTP, remote_base: str, relative_path: str, local_base: Path, verbose: bool) -> None:
    remote_path = f"{remote_base.rstrip('/')}/{relative_path}" if remote_base != "/" else f"/{relative_path}"
    local_path = local_base / Path(relative_path)
    ensure_parent_directory(local_path)

    log(f"Downloading: {relative_path}", verbose)
    with open(local_path, "wb") as f:
        ftp.retrbinary(f"RETR {remote_path}", f.write)


def sync_missing_files(ftp: FTP, sync: SyncOptions) -> int:
    # Create local base directory if needed and compute local manifest first
    sync.local_dir.mkdir(parents=True, exist_ok=True)
    local_files = list_local_files(sync.local_dir, sync.recursive)
    print(f"Local files: {local_files}")
    normalized_base = sync.remote_dir.rstrip("/") or "/"

    missing_count = 0
    printed_dry_header = False

    # Helper to print header once in dry-run mode
    def ensure_dry_header() -> None:
        nonlocal printed_dry_header
        if sync.dry_run and not printed_dry_header:
            log("Dry-run: the following files would be downloaded:", True)
            printed_dry_header = True

    # MLSD-based walker if supported
    def walk_with_mlsd(dir_path: str, rel_prefix: str) -> None:
        nonlocal missing_count
        entries = try_mlsd(ftp, dir_path)
        if entries is None:
            raise RuntimeError("MLSD unsupported in walk_with_mlsd")
        for name, facts in entries:
            if name in (".", ".."):
                continue
            entry_type = facts.get("type", "")
            if entry_type == "dir":
                if sync.recursive:
                    next_dir = f"{dir_path.rstrip('/')}/{name}" if dir_path != "/" else f"/{name}"
                    walk_with_mlsd(next_dir, f"{rel_prefix}{name}/")
            elif entry_type == "file":
                rel_path = f"{rel_prefix}{name}"
                if rel_path not in local_files:
                    if sync.dry_run:
                        ensure_dry_header()
                        print(rel_path)
                    else:
                        print(f"Downloading MLSD-based : {rel_path}")
                        download_file(ftp, normalized_base, rel_path, sync.local_dir, sync.verbose)
                        print(f"Downloaded: {rel_path}")
                    missing_count += 1
            else:
                # Skip other types (e.g., links)
                continue

    # Fallback walker using NLST + directory probing
    def walk_with_nlst(dir_path: str, rel_prefix: str) -> None:
        nonlocal missing_count
        with ftp_cwd(ftp, dir_path):
            try:
                names = ftp.nlst()
            except error_perm as exc:
                msg = str(exc).lower()
                if "no files" in msg or exc.args and exc.args[0].startswith("550"):
                    return
                raise
            for name in names:
                if name in (".", ".."):
                    continue
                display_name = name.rsplit("/", 1)[-1] if "/" in name else name
                possible_dir = f"{dir_path.rstrip('/')}/{display_name}" if dir_path != "/" else f"/{display_name}"
                if is_directory_via_cwd_probe(ftp, possible_dir):
                    if sync.recursive:
                        walk_with_nlst(possible_dir, f"{rel_prefix}{display_name}/")
                else:
                    rel_path = f"{rel_prefix}{display_name}"
                    if rel_path not in local_files:
                        if sync.dry_run:
                            ensure_dry_header()
                            print(rel_path)
                        else:
                            print(f"Downloading NLST: {rel_path}")
                            download_file(ftp, normalized_base, rel_path, sync.local_dir, sync.verbose)
                            print(f"Downloaded: {rel_path}")
                        missing_count += 1

    # Prefer MLSD when available from the root; otherwise fallback
    root_entries = try_mlsd(ftp, normalized_base)
    if root_entries is not None:
        log(f"Scanning remote using MLSD under '{normalized_base}'", sync.verbose)
        # Manually iterate the root once to avoid re-listing it inside walk
        for name, facts in root_entries:
            if name in (".", ".."):
                continue
            entry_type = facts.get("type", "")
            if entry_type == "dir":
                if sync.recursive:
                    next_dir = f"{normalized_base.rstrip('/')}/{name}" if normalized_base != "/" else f"/{name}"
                    walk_with_mlsd(next_dir, f"{name}/")
            elif entry_type == "file":
                rel_path = name
                if rel_path not in local_files:
                    if sync.dry_run:
                        ensure_dry_header()
                        print(rel_path)
                    else:
                        print(f"Downloading MLSD-fallback: {rel_path}")
                        download_file(ftp, normalized_base, rel_path, sync.local_dir, sync.verbose)
                        print(f"Downloaded: {rel_path}")
                    missing_count += 1
    else:
        log("MLSD unsupported. Falling back to NLST + probing.", sync.verbose)
        # Single call kicks off recursive walk
        walk_with_nlst(normalized_base, "")

    if missing_count == 0:
        log("No missing files. Local directory is up-to-date with remote subset.", True)
    else:
        if not sync.dry_run:
            log(f"Downloaded {missing_count} file(s).", True)
    return 0


def main(argv: Optional[Sequence[str]] = None) -> int:
    ftp_opts, sync_opts = parse_args(argv)

    try:
        ftp = connect_ftp(ftp_opts, verbose=sync_opts.verbose)
    except all_errors as exc:
        print(f"FTP connection/login failed: {exc}", file=sys.stderr)
        return 2

    try:
        return sync_missing_files(ftp, sync_opts)
    except all_errors as exc:
        print(f"FTP error during sync: {exc}", file=sys.stderr)
        return 3
    finally:
        try:
            ftp.quit()
        except Exception:
            try:
                ftp.close()
            except Exception:
                pass


if __name__ == "__main__":
    sys.exit(main())
