"""
Can-Do-Steps fetch audio component for the Lumabit lesson-generation pipeline.
Downloads bird calls or other audio resources from Xeno-Canto for each roadmap step.
"""
import os
from datetime import datetime
from typing import Dict, Any, List, Optional, Tuple

import requests

from utils.io import ensure_dir, save_output
from utils.storage import (
    path_exists,
    read_json,
    write_json,
    write_bytes,
    copy_path,
)

XENO_CANTO_API_URL = "https://www.xeno-canto.org/api/3/recordings"
QUALITY_ORDER = {"A": 0, "B": 1, "C": 2, "D": 3, "E": 4}


def load_roadmap_file(run_id: str) -> Optional[Dict[str, Any]]:
    """Load the roadmap JSON file for the given run ID."""
    roadmap_path = f"output/can-do-steps/{run_id}/roadmap-{run_id}.json"

    if not path_exists(roadmap_path):
        print(f"⚠️ Roadmap file not found: {roadmap_path}")
        return None

    try:
        data = read_json(roadmap_path)
        print(f"✓ Loaded roadmap file: {roadmap_path}")
        return data
    except Exception as exc:
        print(f"⚠️ Error loading roadmap file {roadmap_path}: {exc}")
        return None


def extract_step_entries(roadmap_data: Dict[str, Any]) -> List[Dict[str, Any]]:
    """Collect step-level entries from the roadmap structure."""
    steps: List[Dict[str, Any]] = []

    for track in roadmap_data.get("tracks", []):
        for path in track.get("paths", []):
            for step in path.get("steps", []):
                steps.append({
                    "id": step.get("id", ""),
                    "title": step.get("title", ""),
                    "description": step.get("description", ""),
                    "level": step.get("level"),
                    "track_id": track.get("id"),
                    "path_id": path.get("id"),
                    "track_title": track.get("title"),
                    "path_title": path.get("title")
                })

    return steps


def apply_audio_to_step(
    roadmap_data: Dict[str, Any],
    step_id: str,
    audio_filename: str
) -> bool:
    """Attach the audio filename to the matching step in the roadmap."""
    if not step_id:
        return False

    for track in roadmap_data.get("tracks", []):
        for path in track.get("paths", []):
            for step in path.get("steps", []):
                if step.get("id") == step_id:
                    existing = step.get("audio")

                    if isinstance(existing, list):
                        if audio_filename not in existing:
                            existing.append(audio_filename)
                    elif isinstance(existing, str) and existing:
                        if existing != audio_filename:
                            step["audio"] = [existing, audio_filename]
                    else:
                        step["audio"] = audio_filename

                    return True

    return False


def parse_latin_name(description: str) -> Optional[Tuple[str, str]]:
    """Attempt to parse a Latin binomial name from the step description."""
    if not description:
        return None

    tokens = [token.strip(",.;:()[]{}") for token in description.split() if token.strip(",.;:()[]{}")]

    if len(tokens) < 2:
        return None

    genus, species = tokens[0], tokens[1]

    if not genus or not species:
        return None

    genus_clean = genus[0].upper() + genus[1:] if len(genus) > 1 else genus.upper()
    species_clean = species.lower()

    return genus_clean, species_clean


def build_query(genus: str, species: str) -> str:
    """Construct the Xeno-Canto query string using required tags."""
    return f"gen:{genus} sp:{species}"


def normalise_recording_url(file_path: str) -> Optional[str]:
    """Normalise the file URL provided in the Xeno-Canto response."""
    if not file_path:
        return None

    if file_path.startswith("//"):
        return f"https:{file_path}"

    if file_path.startswith("http://") or file_path.startswith("https://"):
        return file_path

    if not file_path.startswith("/"):
        file_path = f"/{file_path}"

    return f"https://www.xeno-canto.org{file_path}"


def select_preferred_recording(recordings: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
    """Select the best available recording based on quality and availability."""
    candidates = [rec for rec in recordings if rec.get("file")]

    if not candidates:
        return None

    def sort_key(rec: Dict[str, Any]) -> Tuple[int, int]:
        quality = rec.get("q", "Z").upper()
        quality_rank = QUALITY_ORDER.get(quality, len(QUALITY_ORDER))
        try:
            recording_id = int(rec.get("id", 0))
        except (TypeError, ValueError):
            recording_id = 0
        return quality_rank, recording_id

    candidates.sort(key=sort_key)
    return candidates[0]


def fetch_audio(
    run_id: str,
    overwrite: bool = False,
    limit: Optional[int] = None
) -> Dict[str, Any]:
    """Fetch Xeno-Canto audio for each roadmap step and save it locally."""
    roadmap = load_roadmap_file(run_id)
    if not roadmap:
        raise FileNotFoundError(f"Roadmap for run '{run_id}' not found")

    api_key = os.getenv("XENO_CANTO_API_KEY")
    if not api_key:
        raise EnvironmentError("Missing XENO_CANTO_API_KEY environment variable")

    steps = extract_step_entries(roadmap)
    if not steps:
        print("⚠️ No steps found in roadmap; nothing to fetch")
        steps = []

    if limit is not None and limit < len(steps):
        steps = steps[:limit]

    audio_dir = f"output/can-do-steps/{run_id}/audio"
    ensure_dir(audio_dir)

    session = requests.Session()
    session.headers.update({
        "X-API-Key": api_key,
        "User-Agent": "LumabitFetchAudio/1.0"
    })

    results: List[Dict[str, Any]] = []
    downloaded_count = 0
    skipped_count = 0
    error_count = 0
    roadmap_modified = False
    updated_steps: List[str] = []

    print(f"🎧 Fetching audio for {len(steps)} steps")

    for index, step in enumerate(steps, start=1):
        step_id = step.get("id") or f"step-{index}"
        description = step.get("description", "").strip()
        record: Dict[str, Any] = {
            "step_id": step_id,
            "title": step.get("title", ""),
            "description": description,
            "track_id": step.get("track_id"),
            "path_id": step.get("path_id"),
            "status": "pending"
        }

        print(f"  [{index}/{len(steps)}] Processing step '{step_id}'")

        parsed_name = parse_latin_name(description)
        if not parsed_name:
            record.update({
                "status": "skipped",
                "message": "Could not parse Latin name from description"
            })
            skipped_count += 1
            results.append(record)
            print("    ⚠️ Skipped: unable to parse Latin name")
            continue

        genus, species = parsed_name
        query = build_query(genus, species)
        record.update({
            "genus": genus,
            "species": species,
            "query": query
        })

        try:
            response = session.get(
                XENO_CANTO_API_URL,
                params={"query": query, "key": api_key},
                timeout=30,
            )
            content_type = response.headers.get("Content-Type")
            location = response.headers.get("Location")
            redirect_note = f", redirect -> {location}" if location else ""
            print(f"    ↪️ Response status: {response.status_code}, content-type: {content_type}{redirect_note}")
            response.raise_for_status()
        except requests.RequestException as exc:
            record.update({
                "status": "error",
                "message": f"API request failed: {exc}"
            })
            error_count += 1
            results.append(record)
            print(f"    ❌ API request failed: {exc}")
            continue

        try:
            data = response.json()
        except ValueError as exc:
            preview = ""
            try:
                preview = response.text[:200]
            except Exception:
                pass
            if preview:
                print(f"    🔍 Response preview: {preview!r}")
            record.update({
                "status": "error",
                "message": f"Invalid JSON response: {exc}"
            })
            error_count += 1
            results.append(record)
            print(f"    ❌ Failed to parse API response: {exc}")
            continue

        recordings = data.get("recordings", [])
        if not recordings:
            record.update({
                "status": "skipped",
                "message": "No recordings found"
            })
            skipped_count += 1
            results.append(record)
            print("    ⚠️ No recordings found")
            continue

        preferred = select_preferred_recording(recordings)
        if not preferred:
            record.update({
                "status": "skipped",
                "message": "No usable recording URLs"
            })
            skipped_count += 1
            results.append(record)
            print("    ⚠️ No usable recording URLs")
            continue

        recording_id = preferred.get("id")
        file_url = normalise_recording_url(preferred.get("file", ""))
        if not file_url:
            record.update({
                "status": "skipped",
                "message": "Recording URL missing"
            })
            skipped_count += 1
            results.append(record)
            print("    ⚠️ Recording URL missing")
            continue

        extension = os.path.splitext(file_url)[1] or ".mp3"
        filename = f"{step_id}{extension}"
        destination_path = os.path.join(audio_dir, filename)

        if path_exists(destination_path) and not overwrite:
            record.update({
                "status": "skipped",
                "message": "File already exists",
                "recording_id": recording_id,
                "file_path": destination_path
            })
            skipped_count += 1
            results.append(record)
            print("    ⏭️  Skipped (file exists)")
            continue

        try:
            audio_response = session.get(file_url, timeout=60)
            audio_response.raise_for_status()
            write_bytes(
                destination_path,
                audio_response.content,
                content_type=audio_response.headers.get("Content-Type", "audio/mpeg"),
            )
        except requests.RequestException as exc:
            record.update({
                "status": "error",
                "message": f"Download failed: {exc}",
                "recording_id": recording_id,
                "file_url": file_url
            })
            error_count += 1
            results.append(record)
            print(f"    ❌ Download failed: {exc}")
            continue
        except OSError as exc:
            record.update({
                "status": "error",
                "message": f"File write failed: {exc}",
                "recording_id": recording_id,
                "file_url": file_url
            })
            error_count += 1
            results.append(record)
            print(f"    ❌ Failed to save file: {exc}")
            continue

        record.update({
            "status": "downloaded",
            "recording_id": recording_id,
            "file_url": file_url,
            "file_path": destination_path,
            "quality": preferred.get("q"),
            "license": preferred.get("lic"),
            "length": preferred.get("length")
        })
        downloaded_count += 1
        results.append(record)
        print(f"    ✅ Saved to {destination_path}")

        if apply_audio_to_step(roadmap, step_id, filename):
            roadmap_modified = True
            updated_steps.append(step_id)
        else:
            print(f"    ⚠️ Warning: could not annotate roadmap for step '{step_id}'")

    session.close()

    backup_roadmap_path: Optional[str] = None
    original_roadmap_path = f"output/can-do-steps/{run_id}/roadmap-{run_id}.json"

    if roadmap_modified:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        backup_roadmap_path = f"output/can-do-steps/{run_id}/archived/roadmap-{run_id}-{timestamp}.json"

        try:
            if path_exists(original_roadmap_path):
                copy_path(original_roadmap_path, backup_roadmap_path)
                print(f"✅ Created roadmap backup: {backup_roadmap_path}")

            write_json(original_roadmap_path, roadmap)

            print(f"✅ Updated roadmap with audio references: {original_roadmap_path}")
        except OSError as exc:
            print(f"❌ Failed to update roadmap file: {exc}")
            backup_roadmap_path = None

    summary = {
        "run_id": run_id,
        "steps_considered": len(steps),
        "downloaded": downloaded_count,
        "skipped": skipped_count,
        "errors": error_count,
        "overwrite": overwrite,
        "roadmap_updated": roadmap_modified,
        "updated_step_ids": updated_steps,
        "roadmap_path": original_roadmap_path,
        "backup_roadmap_path": backup_roadmap_path,
        "results": results
    }

    save_output(
        data=summary,
        pipeline="can-do-steps",
        step="fetch_audio",
        run_id=run_id,
        subfolder="audio"
    )

    print(f"\n🎉 Audio fetch complete: {downloaded_count} downloaded, {skipped_count} skipped, {error_count} errors")

    return summary


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Fetch audio for can-do-steps roadmap")
    parser.add_argument("--run-id", required=True, help="Run identifier")
    parser.add_argument("--overwrite", action="store_true", help="Overwrite existing audio files")
    parser.add_argument("--limit", type=int, help="Limit number of steps to process")
    args = parser.parse_args()

    fetch_audio(
        run_id=args.run_id,
        overwrite=args.overwrite,
        limit=args.limit
    )
