"""
Roadmap paths component for the Lumabit lesson-generation pipeline.
Paths are collections of steps within a track.
"""
import os
import json
from typing import Dict, Any, List, Optional

from utils.io import save_output, load_latest_output
from chains.base import build_chain, default_json_parser, parse_output
from chains.roadmap.tracks import load_tracks

def parse_paths(output: str) -> Dict[str, Any]:
    """
    Parse the paths from the LLM output.

    Args:
        output: Raw output from the LLM

    Returns:
        Dict: Parsed paths data
    """
    try:
        # Parse the JSON from the output
        parsed = parse_output(output, default_json_parser)

        # Validate the expected structure
        if "paths" not in parsed:
            raise ValueError("Expected 'paths' key in parsed output")

        paths = parsed["paths"]
        if not isinstance(paths, list):
            raise ValueError("Expected 'paths' to be a list")

        # Validate each path has the required fields
        required_fields = ["id", "track_id", "title", "description"]
        for i, path in enumerate(paths):
            if not isinstance(path, dict):
                raise ValueError(f"Path at index {i} is not a dictionary")

            for field in required_fields:
                if field not in path:
                    raise ValueError(f"Path at index {i} is missing required field '{field}'")

        return parsed
    except Exception as e:
        print(f"Error parsing paths: {e}")
        raise

def generate_paths(
    run_id: str,
    track_id: Optional[str] = None,
    force_text: bool = False
) -> Dict[str, Any]:
    """
    Generate roadmap paths for a specific track or all tracks.

    Args:
        run_id: Run identifier
        track_id: Optional track ID to generate paths for
        force_text: If True, use existing raw text output instead of calling API

    Returns:
        Dict: Generated paths data
    """
    # Check if we should use existing raw output
    if force_text:
        existing_output = load_latest_output(
            pipeline="roadmap",
            step="paths",
            run_id=run_id,
            as_text=True,
            raw=True
        )

        if existing_output:
            print(f"Using existing raw output for paths in roadmap/{run_id}")
            parsed_paths = parse_paths(existing_output)

            # Save the parsed output
            save_output(
                data=parsed_paths,
                pipeline="roadmap",
                step="paths",
                run_id=run_id
            )

            return parsed_paths

    # Load the tracks if we need them
    tracks_data = load_tracks(run_id)
    if not tracks_data:
        raise ValueError(f"No tracks found for run ID: {run_id}")

    # Prepare input variables
    input_variables = {}

    # If a specific track ID is provided, filter the tracks
    if track_id:
        tracks = [t for t in tracks_data["tracks"] if t["id"] == track_id]
        if not tracks:
            raise ValueError(f"Track with ID {track_id} not found")

        input_variables["track"] = tracks[0]
    else:
        input_variables["tracks"] = tracks_data["tracks"]

    print(f"Generating roadmap paths for run ID: {run_id}")
    result = build_chain(
        chain_name="paths",
        pipeline="roadmap",
        run_id=run_id,
        input_variables=input_variables
    )

    # Parse the paths from the result
    parsed_paths = parse_paths(result["output"])

    # Save the parsed output
    save_output(
        data=parsed_paths,
        pipeline="roadmap",
        step="paths",
        run_id=run_id
    )

    return parsed_paths

def load_paths(run_id: str) -> Optional[Dict[str, Any]]:
    """
    Load previously generated paths.

    Args:
        run_id: Run identifier

    Returns:
        Dict: Previously generated paths, or None if not found
    """
    return load_latest_output(
        pipeline="roadmap",
        step="paths",
        run_id=run_id
    )