"""
Can-Do-Steps find resources component for the Lumabit lesson-generation pipeline.
Curates external learning resources for each lesson and attaches them to bits files.
"""
from __future__ import annotations

import json
import os
import traceback
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from utils.io import load_latest_output, save_output
from chains.base import build_chain, default_json_parser, parse_output
from chains.can_do_steps.split_hierarchy import load_steps, load_paths, load_tracks
from utils.storage import path_exists, read_json, write_json, copy_path

RESOURCE_TYPES: List[str] = [
    "article",
    "video",
    "interactive",
    "tool",
    "template",
    "book",
    "podcast",
    "case_study",
    "community",
    "other",
]

COST_TYPES: List[str] = ["free", "freemium", "paid", "subscription", "donation"]


def resolve_find_resources_prompt(run_id: Optional[str] = None, prompt_id: Optional[str] = None) -> Tuple[str, str]:
    """
    Resolve the prompt file to use for find_resources.

    Priority order:
      1. prompt_id override (find_resources_<prompt_id>.txt)
      2. run-specific prompt (find_resources_<run_id>.txt)
      3. Default prompt (find_resources.txt)
    """
    prompt_dir = os.path.join("prompts", "can-do-steps")
    default_chain_name = "find_resources"
    default_prompt_path = os.path.join(prompt_dir, f"{default_chain_name}.txt")

    if prompt_id:
        candidate_filename = f"{default_chain_name}_{prompt_id}.txt"
        candidate_prompt_path = os.path.join(prompt_dir, candidate_filename)
        if os.path.exists(candidate_prompt_path):
            chain_name = f"{default_chain_name}_{prompt_id}"
            print(f"Using prompt override: {candidate_prompt_path}")
            return chain_name, candidate_prompt_path
        else:
            print(f"Prompt override not found ({candidate_prompt_path}); falling back to run-specific/default prompt.")

    if run_id:
        candidate_filename = f"{default_chain_name}_{run_id}.txt"
        candidate_prompt_path = os.path.join(prompt_dir, candidate_filename)
        if os.path.exists(candidate_prompt_path):
            chain_name = f"{default_chain_name}_{run_id}"
            print(f"Using roadmap-specific prompt: {candidate_prompt_path}")
            return chain_name, candidate_prompt_path

    print(f"Using default prompt: {default_prompt_path}")
    return default_chain_name, default_prompt_path


def load_roadmap_file(run_id: str) -> Optional[Dict[str, Any]]:
    """
    Load the roadmap JSON file for the given run ID.
    """
    roadmap_path = f"output/can-do-steps/{run_id}/roadmap-{run_id}.json"

    if not path_exists(roadmap_path):
        print(f"⚠️ Roadmap file not found: {roadmap_path}")
        return None

    try:
        data = read_json(roadmap_path)
        print(f"✓ Loaded roadmap file: {roadmap_path}")
        return data
    except Exception as exc:
        print(f"⚠️ Error loading roadmap file {roadmap_path}: {exc}")
        return None


def load_bits_file(run_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
    """
    Load the organized bits file for the run.

    Returns the JSON data and the filepath used.
    """
    candidate_filenames = [
        f"bits-{run_id}.json",
        f"bits_{run_id}.json",
    ]

    for filename in candidate_filenames:
        bits_filepath = os.path.join("output", "can-do-steps", run_id, filename)
        if path_exists(bits_filepath):
            try:
                data = read_json(bits_filepath)
                print(f"✓ Loaded bits file: {bits_filepath}")
                return data, bits_filepath
            except Exception as exc:
                print(f"⚠️ Error loading bits file {bits_filepath}: {exc}")
                return None, None

    print(f"⚠️ Bits file not found for run {run_id}. Tried: {', '.join(candidate_filenames)}")
    return None, None


def build_lookup(items: Optional[List[Dict[str, Any]]], key: str = "id") -> Dict[str, Dict[str, Any]]:
    """
    Build a dictionary lookup from a list of dictionaries.
    """
    if not items:
        return {}

    lookup: Dict[str, Dict[str, Any]] = {}
    for item in items:
        if isinstance(item, dict) and key in item:
            lookup[item[key]] = item
    return lookup


def collect_targets(
    steps: List[Dict[str, Any]],
    path_lookup: Dict[str, Dict[str, Any]],
    track_lookup: Dict[str, Dict[str, Any]],
    bits_data: Optional[Dict[str, Any]],
) -> List[Dict[str, Any]]:
    """
    Collect step targets with their associated hierarchy context and lesson content.
    """
    targets: List[Dict[str, Any]] = []

    for step in steps:
        if not isinstance(step, dict):
            continue

        step_id = step.get("id")
        if not step_id:
            continue

        path_id = step.get("pathId")
        path_info = path_lookup.get(path_id) if path_lookup else None

        track_info = None
        if path_info:
            track_info = track_lookup.get(path_info.get("trackId"))
        else:
            track_info = track_lookup.get(step.get("trackId")) if track_lookup else None

        step_bits_entry = bits_data.get(step_id) if isinstance(bits_data, dict) else None
        lesson_content = ""
        existing_resources = None
        if isinstance(step_bits_entry, dict):
            lesson_content = step_bits_entry.get("lesson_content", "")
            existing_resources = step_bits_entry.get("resources")

        targets.append(
            {
                "step": step,
                "path": path_info,
                "track": track_info,
                "lesson_content": lesson_content,
                "existing_resources": existing_resources,
            }
        )

    return targets


def _coerce_bool(value: Any) -> bool:
    if isinstance(value, bool):
        return value
    if isinstance(value, str):
        return value.strip().lower() in {"true", "yes", "y", "1"}
    if isinstance(value, (int, float)):
        return bool(value)
    return False


def _normalize_resource(resource: Any, index: int) -> Dict[str, Any]:
    if not isinstance(resource, dict):
        raise ValueError(f"Resource at index {index} is not a dictionary")

    name = str(resource.get("name", "")).strip()
    resource_type = str(resource.get("type", "")).strip().lower()
    url = str(resource.get("url", "")).strip()
    short_description = str(resource.get("short_description", "")).strip()
    sign_in_required = _coerce_bool(resource.get("sign_in_required", False))
    cost_type = str(resource.get("cost_type", "")).strip().lower() or "free"
    notes = str(resource.get("notes", "")).strip()

    if not name:
        raise ValueError(f"Resource at index {index} is missing 'name'")
    if resource_type not in RESOURCE_TYPES:
        raise ValueError(f"Resource at index {index} has invalid type '{resource_type}'")
    if not url:
        raise ValueError(f"Resource at index {index} is missing 'url'")
    if not short_description:
        raise ValueError(f"Resource at index {index} is missing 'short_description'")
    if cost_type not in COST_TYPES:
        raise ValueError(f"Resource at index {index} has invalid cost_type '{cost_type}'")

    return {
        "name": name,
        "type": resource_type,
        "url": url,
        "short_description": short_description,
        "sign_in_required": sign_in_required,
        "cost_type": cost_type,
        "notes": notes,
    }


def parse_resources_output(output: str, expected_step_id: str) -> Dict[str, Any]:
    """
    Parse and validate the resources output returned by the LLM.
    """
    parsed = parse_output(output, default_json_parser)
    if not isinstance(parsed, dict):
        raise ValueError("Parsed resources output must be a JSON object.")

    step_id = parsed.get("step_id", expected_step_id)
    if step_id != expected_step_id:
        print(f"⚠️ Step ID mismatch in resources output. Expected {expected_step_id}, got {step_id}. Using expected ID.")
        step_id = expected_step_id

    raw_resources = parsed.get("resources", [])
    if raw_resources is None:
        raw_resources = []
    if not isinstance(raw_resources, list):
        raise ValueError("Resources output must include a list under 'resources'.")

    normalized_resources = [_normalize_resource(item, idx) for idx, item in enumerate(raw_resources)]
    notes = parsed.get("notes", "")
    if not isinstance(notes, str):
        notes = str(notes)

    return {
        "step_id": step_id,
        "resources": normalized_resources,
        "notes": notes.strip(),
    }


def _generate_resources_for_target(
    index: int,
    total_targets: int,
    target: Dict[str, Any],
    run_id: str,
    roadmap_title: str,
    roadmap_description: str,
    chain_name: str,
) -> Dict[str, Any]:
    """
    Generate resources for a single target and return the aggregated result with logs.
    """
    step = target.get("step") or {}
    path = target.get("path") or {}
    track = target.get("track") or {}

    step_id = step.get("id", f"step-{index + 1}")
    logs = [f"\n📚 Finding resources for step {index + 1}/{total_targets}: {step_id}"]

    step_title = step.get("title", "")
    step_description = step.get("description", "")
    step_level = step.get("level", "")
    statements = step.get("statements", [])
    if isinstance(statements, list):
        step_statements_text = "; ".join(statements) if statements else "None provided"
    elif isinstance(statements, str) and statements.strip():
        step_statements_text = statements
    else:
        step_statements_text = "None provided"

    path_title = path.get("title", "")
    path_description = path.get("description", "")

    track_title = track.get("title", "")
    track_description = track.get("description", "")

    lesson_content = target.get("lesson_content", "")
    existing_resources = target.get("existing_resources")
    existing_resources_text = (
        json.dumps(existing_resources, ensure_ascii=False, indent=2)
        if existing_resources is not None
        else "None"
    )

    try:
        result = build_chain(
            chain_name=chain_name,
            pipeline="can-do-steps",
            run_id=run_id,
            input_variables={
                "run_id": run_id,
                "roadmap_title": roadmap_title,
                "roadmap_description": roadmap_description,
                "track_title": track_title,
                "track_description": track_description,
                "path_title": path_title,
                "path_description": path_description,
                "step_id": step_id,
                "step_title": step_title,
                "step_description": step_description,
                "step_level": step_level,
                "step_statements": step_statements_text,
                "lesson_content": lesson_content,
                "existing_resources": existing_resources_text,
                "allowed_resource_types": ", ".join(RESOURCE_TYPES),
                "allowed_cost_types": ", ".join(COST_TYPES),
            },
        )

        parsed_output = parse_resources_output(result["output"], expected_step_id=step_id)
        resources = parsed_output.get("resources", [])
        notes = parsed_output.get("notes", "")
        status = "success"
    except Exception as exc:
        resources = []
        notes = f"Error finding resources: {exc}"
        status = "error"
        logs.append(f"⚠️ Error finding resources for step {step_id}: {exc}")
        logs.append(traceback.format_exc())

    logs.append(f"✅ {step_id}: {len(resources)} resources")

    aggregated_result = {
        "step_id": step_id,
        "step_title": step_title,
        "resources": resources,
        "notes": notes,
        "path_id": path.get("id", ""),
        "path_title": path_title,
        "track_id": track.get("id", ""),
        "track_title": track_title,
        "step_level": step_level,
        "step_description": step_description,
    }

    return {
        "index": index,
        "result": aggregated_result,
        "logs": logs,
        "status": status,
    }


def apply_resources_to_bits(
    bits_data: Dict[str, Any],
    bits_filepath: str,
    resources_results: List[Dict[str, Any]],
) -> Dict[str, Any]:
    """
    Attach generated resources to the bits file and persist the update.
    """
    backup_path = None
    bits_path_obj = Path(bits_filepath)
    if path_exists(bits_filepath):
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        backup_path = bits_path_obj.with_name(f"{bits_path_obj.stem}-{timestamp}{bits_path_obj.suffix}")
        copy_path(bits_filepath, str(backup_path))
        print(f"✅ Created bits backup: {backup_path}")

    updated_steps: List[str] = []
    missing_steps: List[str] = []
    total_resources = 0

    for item in resources_results:
        step_id = item.get("step_id")
        if not step_id:
            continue

        step_entry = bits_data.get(step_id) if isinstance(bits_data, dict) else None
        if not isinstance(step_entry, dict):
            missing_steps.append(step_id)
            continue

        resources = item.get("resources", [])
        step_entry["resources"] = resources
        if item.get("notes"):
            step_entry["resources_notes"] = item.get("notes")

        total_resources += len(resources)
        updated_steps.append(step_id)

    write_json(bits_filepath, bits_data)
    print(f"✅ Updated bits file with resources: {bits_filepath}")

    if missing_steps:
        print(f"⚠️ Resources generated for {len(missing_steps)} steps not present in bits file: {', '.join(missing_steps)}")

    return {
        "backup_path": str(backup_path) if backup_path else None,
        "updated_steps": updated_steps,
        "missing_steps": missing_steps,
        "total_resources": total_resources,
    }


def find_resources(
    run_id: str,
    force_text: bool = False,
    limit: Optional[int] = None,
    max_workers: Optional[int] = None,
    prompt_id: Optional[str] = None,
) -> Dict[str, Any]:
    """
    Generate external learning resources for each step and attach them to bits files.
    """
    print(f"🔎 Starting resource discovery for run ID: {run_id}")

    bits_data, bits_filepath = load_bits_file(run_id)
    if not bits_data or not bits_filepath:
        raise FileNotFoundError(f"Bits file not found for run ID: {run_id}")

    if force_text:
        existing_output = load_latest_output(
            pipeline="can-do-steps",
            step="find_resources",
            run_id=run_id,
            subfolder="logs",
        )
        if existing_output:
            print(f"Using existing find_resources output for can-do-steps/{run_id}")
            apply_summary = apply_resources_to_bits(
                bits_data,
                bits_filepath,
                existing_output.get("results", []),
            )
            existing_output["bits_backup"] = apply_summary.get("backup_path")
            existing_output["updated_steps"] = apply_summary.get("updated_steps")
            existing_output["missing_steps"] = apply_summary.get("missing_steps")
            return existing_output
        else:
            print("⚠️ No existing find_resources output found; running discovery now.")

    roadmap_data = load_roadmap_file(run_id)
    if not roadmap_data:
        raise ValueError(f"Could not load roadmap file for run ID: {run_id}")

    steps_data = load_steps(run_id) or {"steps": []}
    paths_data = load_paths(run_id) or {"paths": []}
    tracks_data = load_tracks(run_id) or {"tracks": []}

    path_lookup = build_lookup(paths_data.get("paths", []))
    track_lookup = build_lookup(tracks_data.get("tracks", []))

    steps_list = steps_data.get("steps", [])
    if not steps_list:
        raise ValueError(f"No steps found for run ID: {run_id}")

    all_targets = collect_targets(steps_list, path_lookup, track_lookup, bits_data)
    total_available = len(all_targets)

    if limit is not None and limit > 0:
        targets = all_targets[:limit]
        print(f"Processing {len(targets)} of {total_available} steps (limit={limit}).")
    else:
        targets = all_targets
        print(f"Processing all {total_available} steps.")

    chain_name, prompt_path = resolve_find_resources_prompt(run_id, prompt_id)
    print(f"Prompt selected: {prompt_path}")

    default_workers = min(8, (os.cpu_count() or 4) * 2)
    workers = max_workers if max_workers is not None else default_workers
    if workers is None or workers < 1:
        workers = 1

    roadmap_title = roadmap_data.get("title", run_id)
    roadmap_description = roadmap_data.get("description", "")

    aggregated_slots: List[Optional[Dict[str, Any]]] = [None] * len(targets)
    logs: List[str] = []

    if workers == 1 or len(targets) <= 1:
        for idx, target in enumerate(targets):
            result_data = _generate_resources_for_target(
                index=idx,
                total_targets=len(targets),
                target=target,
                run_id=run_id,
                roadmap_title=roadmap_title,
                roadmap_description=roadmap_description,
                chain_name=chain_name,
            )
            aggregated_slots[idx] = result_data["result"]
            logs.extend(result_data["logs"])
    else:
        import concurrent.futures

        print(f"Processing {len(targets)} steps with up to {workers} workers.")
        with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
            futures = [
                executor.submit(
                    _generate_resources_for_target,
                    idx,
                    len(targets),
                    target,
                    run_id,
                    roadmap_title,
                    roadmap_description,
                    chain_name,
                )
                for idx, target in enumerate(targets)
            ]

            for future in concurrent.futures.as_completed(futures):
                result_data = future.result()
                aggregated_slots[result_data["index"]] = result_data["result"]
                logs.extend(result_data["logs"])

    resources_results: List[Dict[str, Any]] = [item for item in aggregated_slots if item is not None]
    steps_processed = len(resources_results)
    total_resources = sum(len(item.get("resources", [])) for item in resources_results)

    # Apply resources to bits file
    apply_summary = apply_resources_to_bits(bits_data, bits_filepath, resources_results)

    final_result = {
        "run_id": run_id,
        "prompt_used": prompt_path,
        "chain_name": chain_name,
        "total_steps_available": total_available,
        "steps_processed": steps_processed,
        "total_resources_attached": total_resources,
        "bits_filepath": bits_filepath,
        "bits_backup": apply_summary.get("backup_path"),
        "updated_steps": apply_summary.get("updated_steps"),
        "missing_steps": apply_summary.get("missing_steps"),
        "results": resources_results,
    }

    timestamped_filepath = save_output(
        data=final_result,
        pipeline="can-do-steps",
        step="find_resources",
        run_id=run_id,
        subfolder="logs",
    )

    print(f"\n✅ Resource discovery complete for {run_id}: {total_resources} resources across {steps_processed} steps.")
    print(f"Results saved to: {timestamped_filepath}")

    return final_result


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Find learning resources for can-do-steps roadmap")
    parser.add_argument("--run-id", required=True, help="Run identifier")
    parser.add_argument("--force-text", action="store_true", help="Use existing raw text output")
    parser.add_argument("--limit", type=int, help="Limit number of steps to process")
    parser.add_argument(
        "--max-workers",
        type=int,
        dest="max_workers",
        help="Maximum number of concurrent worker threads to use",
    )
    parser.add_argument("--prompt-id", help="Override prompt identifier for selecting a specific prompt")
    args = parser.parse_args()

    summary = find_resources(
        run_id=args.run_id,
        force_text=args.force_text,
        limit=args.limit,
        max_workers=args.max_workers,
        prompt_id=args.prompt_id,
    )

    print(json.dumps(summary, indent=2, ensure_ascii=False))