#!/usr/bin/env python3
"""
CLI orchestrator for the Lumabit lesson-generation pipeline.
Handles command-line arguments and executes the appropriate chains.
"""
import os
import sys
import argparse
from pathlib import Path
from typing import List, Optional
from dotenv import load_dotenv
from utils.io import get_input_prefix
from utils.storage import path_exists, materialize_to_local_file

# Import cost tracking utilities
from utils.cost_tracker import (
    start_step_tracking,
    finish_step_tracking,
    print_cost_summary,
    reset_cost_tracking,
    start_pipeline_timing,
    end_pipeline_timing
)

# Load environment variables from .env file
load_dotenv()

# Enable remote debugging if DEBUG environment variable is true
if os.getenv("DEBUG", "false").lower() == "true":
    try:
        import debugpy
        debugpy.listen(("localhost", 5678))
        debugpy.wait_for_client()
        print("🐛 Remote debugging enabled on localhost:5678")
        print("Waiting for debugger to attach...")
    except ImportError:
        print("⚠️ debugpy not found. Remote debugging disabled.")
    except Exception as e:
        print(f"⚠️ Failed to initialize remote debugging: {e}")


def _candidate_input_refs(value: str, input_prefix: str) -> List[str]:
    raw = (value or "").strip()
    if not raw:
        return []

    candidates: List[str] = [raw]
    if raw.startswith("s3://"):
        return candidates

    normalized_prefix = (input_prefix or "input").strip().strip("/")
    normalized_raw = raw.lstrip("./").lstrip("/")
    if normalized_prefix and not normalized_raw.startswith(f"{normalized_prefix}/"):
        candidates.append(f"{normalized_prefix}/{normalized_raw}")
    return candidates


def _resolve_existing_input_ref(value: str, input_prefix: str, label: str) -> str:
    candidates = _candidate_input_refs(value, input_prefix)
    for candidate in candidates:
        if os.path.exists(candidate) or path_exists(candidate):
            return candidate
    joined = ", ".join(candidates) if candidates else value
    raise ValueError(f"{label} not found. Checked: {joined}")


def _resolve_local_file_arg(value: str, input_prefix: str, label: str) -> str:
    resolved = _resolve_existing_input_ref(value, input_prefix, label)
    if os.path.exists(resolved):
        return resolved
    return materialize_to_local_file(resolved)

def validate_args(args: argparse.Namespace) -> None:
    """
    Validate command-line arguments.

    Args:
        args: Parsed command-line arguments

    Raises:
        ValueError: If arguments are invalid
    """
    args.input_prefix = get_input_prefix(getattr(args, "input_prefix", None))

    # Validate pipeline
    if args.pipeline not in ["lessons", "roadmap", "can-do-steps", "roadmap-rag"]:
        raise ValueError(
            f"Invalid pipeline: {args.pipeline}. Must be 'lessons', 'roadmap', 'roadmap-rag', or 'can-do-steps'."
        )

    # Validate step
    valid_lesson_steps = ["extract_topics", "generate_lessons", "create_images", "all"]
    valid_roadmap_steps = ["tracks", "paths", "steps", "bits", "all"]
    valid_can_do_steps = [
        # UNIFIED ARCHITECTURE
        "expand_and_create_hierarchy",
        "build_roadmap",
        "split_hierarchy",
        "organize_bits",
        "describe_images",
        "find_resources",
        "generate_media",
        "change_media",
        "review_content",
        "fix_content",
        "fix_step",
        "fetch_audio",
        "fetch_images",
        "export_json",
        "all"
    ]
    valid_roadmap_rag_steps = ["ingest_resource", "generate_lessons", "all"]

    if args.pipeline == "lessons" and args.step not in valid_lesson_steps:
        raise ValueError(f"Invalid step for lessons pipeline: {args.step}. Must be one of {valid_lesson_steps}.")

    if args.pipeline == "roadmap" and args.step not in valid_roadmap_steps:
        raise ValueError(f"Invalid step for roadmap pipeline: {args.step}. Must be one of {valid_roadmap_steps}.")

    if args.pipeline == "can-do-steps" and args.step not in valid_can_do_steps:
        raise ValueError(f"Invalid step for can-do-steps pipeline: {args.step}. Must be one of {valid_can_do_steps}.")

    if args.pipeline == "roadmap-rag" and args.step not in valid_roadmap_rag_steps:
        raise ValueError(
            f"Invalid step for roadmap-rag pipeline: {args.step}. Must be one of {valid_roadmap_rag_steps}."
        )

    # Validate topic requirement for expand_and_create_hierarchy or build_roadmap
    if args.pipeline == "can-do-steps" and args.step in ["expand_and_create_hierarchy", "build_roadmap", "all"] and not args.topic:
        raise ValueError("Topic is required for expand_and_create_hierarchy or build_roadmap step. Please provide --topic parameter.")

    # Validate PDF requirement for extract_topics
    if args.pipeline == "lessons" and args.step in ["extract_topics", "all"] and not args.pdf:
        # Check if we have existing raw output
        from utils.io import load_latest_output
        # Use first ID for validation (lessons pipeline only accepts single ID)
        validation_id = args.id[0] if isinstance(args.id, list) else args.id
        existing_output = load_latest_output(
            pipeline="lessons",
            step="extract_topics",
            run_id=validation_id,
            as_text=True,
            raw=True
        )

        if not existing_output:
            raise ValueError("PDF file path is required for extract_topics step when no existing output is found.")

    if args.pipeline == "lessons" and args.step in ["extract_topics", "all"] and args.pdf:
        args.pdf = _resolve_local_file_arg(args.pdf, args.input_prefix, "PDF file")

    # Validate run ID(s)
    if not args.id or len(args.id) == 0:
        raise ValueError("At least one run ID is required.")

    # For pipelines other than can-do-steps, only allow single ID
    if args.pipeline != "can-do-steps" and len(args.id) > 1:
        raise ValueError(
            f"Multiple run IDs are only supported for can-do-steps pipeline, got {len(args.id)} IDs for {args.pipeline} pipeline."
        )

    if args.pipeline == "can-do-steps" and args.step == "fix_content":
        if len(args.id) != 1:
            raise ValueError("fix_content step currently supports exactly one run ID.")
        if not args.fix_input:
            raise ValueError("--fix-input is required for fix_content step.")
        args.fix_input = _resolve_existing_input_ref(args.fix_input, args.input_prefix, "Fix instructions file")
        if not args.branch_name:
            raise ValueError("--branch-name is required for fix_content step.")
        if not args.content_dir:
            raise ValueError("--content-dir is required for fix_content step.")
        content_repo = os.getenv("CONTENT_REPO")
        if not content_repo:
            raise ValueError("CONTENT_REPO environment variable must be set for fix_content step.")
        content_repo_path = Path(content_repo).expanduser().resolve()
        if not content_repo_path.exists():
            raise ValueError(f"CONTENT_REPO path not found: {content_repo_path}")
        subpath = Path(args.content_dir)
        if subpath.is_absolute():
            subpath = Path(*subpath.parts[1:])
        content_dir_path = (content_repo_path / subpath).resolve()
        if not content_dir_path.exists():
            raise ValueError(f"Content directory not found: {content_dir_path}")
        setattr(args, "content_repo_path", str(content_repo_path))
        setattr(args, "content_dir_full_path", str(content_dir_path))

    if args.pipeline == "can-do-steps" and args.step == "fix_step":
        if len(args.id) != 1:
            raise ValueError("fix_step currently supports exactly one run ID.")
        if not args.fix_input:
            raise ValueError("--fix-input is required for fix_step.")
        args.fix_input = _resolve_existing_input_ref(args.fix_input, args.input_prefix, "Fix instructions file")
        if args.bits_file:
            args.bits_file = _resolve_existing_input_ref(args.bits_file, args.input_prefix, "Bits file")

    if args.pipeline == "roadmap-rag":
        requires_resource = args.step in ["ingest_resource", "all"]
        requires_roadmap = args.step in ["generate_lessons", "all"]

        if requires_resource and not args.resource_file:
            raise ValueError(
                "Resource file path is required for roadmap-rag ingestion. Provide --resource-file."
            )
        if requires_resource and args.resource_file:
            args.resource_file = _resolve_existing_input_ref(args.resource_file, args.input_prefix, "Resource file")

        if requires_roadmap and not args.roadmap_file:
            raise ValueError(
                "Roadmap file path is required for roadmap-rag lesson generation. Provide --roadmap-file."
            )
        if requires_roadmap and args.roadmap_file:
            args.roadmap_file = _resolve_existing_input_ref(args.roadmap_file, args.input_prefix, "Roadmap file")

    if args.pipeline == "can-do-steps" and getattr(args, "preview_only", False):
        preview_candidates = getattr(args, "preview_candidates", 0)
        if preview_candidates is not None and preview_candidates <= 0:
            raise ValueError("preview-candidates must be a positive integer when using --preview-only.")

    if args.pipeline == "can-do-steps" and getattr(args, "max_width", None) is not None:
        if args.max_width is not None and args.max_width <= 0:
            raise ValueError("max-width must be a positive integer.")

    if args.pipeline == "can-do-steps" and args.step in ["change_media"]:
        if not getattr(args, "source_dir", None):
            raise ValueError("source-dir is required for change_media step.")
        if args.source_dir and not os.path.isdir(args.source_dir):
            raise ValueError(f"Source directory not found: {args.source_dir}")
def run_lessons_pipeline(args: argparse.Namespace) -> None:
    """
    Run the lessons pipeline.

    Args:
        args: Parsed command-line arguments
    """
    if args.step in ["extract_topics", "all"]:
        print(f"🔍 Extracting topics from PDF: {args.pdf}")
        run_id = args.id[0] if isinstance(args.id, list) else args.id
        start_step_tracking("extract_topics", "lessons", run_id)
        from chains.lessons.extract_topics import extract_topics_from_pdf
        topics = extract_topics_from_pdf(
            pdf_path=args.pdf,
            run_id=run_id,
            force_text=args.force_text
        )
        step_summary = finish_step_tracking()
        print(f"✅ Extracted {len(topics['root'])} topics")
        if step_summary:
            print(f"💰 Step cost: {step_summary}")

    if args.step in ["generate_lessons", "all"]:
        print(f"📝 Generating lessons")
        run_id = args.id[0] if isinstance(args.id, list) else args.id
        start_step_tracking("generate_lessons", "lessons", run_id)
        from chains.lessons.generate_lessons import generate_lessons
        lessons = generate_lessons(run_id=run_id)
        step_summary = finish_step_tracking()
        print(f"✅ Generated {len(lessons)} lessons")
        if step_summary:
            print(f"💰 Step cost: {step_summary}")

    if args.step in ["create_images", "all"]:
        print(f"🖼️ Creating images")
        run_id = args.id[0] if isinstance(args.id, list) else args.id
        start_step_tracking("create_images", "lessons", run_id)
        from chains.lessons.create_images import create_images
        results = create_images(run_id=run_id)
        step_summary = finish_step_tracking()
        print(f"✅ Created images for {len(results.get('lessons', []))} lessons")
        if step_summary:
            print(f"💰 Step cost: {step_summary}")

def run_roadmap_pipeline(args: argparse.Namespace) -> None:
    """
    Run the roadmap pipeline.

    Args:
        args: Parsed command-line arguments
    """
    if args.step in ["tracks", "all"]:
        print(f"🔍 Generating tracks")
        run_id = args.id[0] if isinstance(args.id, list) else args.id
        start_step_tracking("tracks", "roadmap", run_id)
        from chains.roadmap.tracks import generate_tracks
        tracks = generate_tracks(
            run_id=run_id,
            force_text=args.force_text
        )
        step_summary = finish_step_tracking()
        print(f"✅ Generated {len(tracks['tracks'])} tracks")
        if step_summary:
            print(f"💰 Step cost: {step_summary}")

    if args.step in ["paths", "all"]:
        print(f"🔍 Generating paths")
        run_id = args.id[0] if isinstance(args.id, list) else args.id
        start_step_tracking("paths", "roadmap", run_id)
        from chains.roadmap.paths import generate_paths
        paths = generate_paths(
            run_id=run_id,
            track_id=args.track_id,
            force_text=args.force_text
        )
        step_summary = finish_step_tracking()
        print(f"✅ Generated {len(paths['paths'])} paths")
        if step_summary:
            print(f"💰 Step cost: {step_summary}")

    if args.step in ["steps", "all"]:
        print(f"🔍 Generating steps")
        run_id = args.id[0] if isinstance(args.id, list) else args.id
        start_step_tracking("steps", "roadmap", run_id)
        from chains.roadmap.steps import generate_steps
        steps = generate_steps(
            run_id=run_id,
            path_id=args.path_id,
            force_text=args.force_text
        )
        step_summary = finish_step_tracking()
        print(f"✅ Generated {len(steps['steps'])} steps")
        if step_summary:
            print(f"💰 Step cost: {step_summary}")

    if args.step in ["bits", "all"]:
        print(f"🔍 Generating bits")
        run_id = args.id[0] if isinstance(args.id, list) else args.id
        start_step_tracking("bits", "roadmap", run_id)
        from chains.roadmap.bits import generate_bits
        bits = generate_bits(
            run_id=run_id,
            step_id=args.step_id,
            force_text=args.force_text
        )
        step_summary = finish_step_tracking()
        print(f"✅ Generated {len(bits['bits'])} bits")
        if step_summary:
            print(f"💰 Step cost: {step_summary}")

    if args.step == "all":
        print(f"🎉 Complete roadmap generated successfully")


def run_roadmap_rag_pipeline(args: argparse.Namespace) -> None:
    """Run the roadmap-rag pipeline."""
    run_id = args.id[0] if isinstance(args.id, list) else args.id

    if args.step in ["ingest_resource", "all"]:
        print(f"📥 Ingesting resource into vectorstore: {args.resource_file}")
        start_step_tracking("ingest_resource", "rag-roadmap", run_id)
        from chains.rag_roadmap.ingest_resource import ingest_resource

        ingest_result = ingest_resource(
            run_id=run_id,
            resource_path=args.resource_file,
            collection_name=args.collection_name,
            persist_directory=args.persist_directory,
            chunk_size=args.chunk_size or 1000,
            chunk_overlap=args.chunk_overlap or 200,
            embedding_model=args.embedding_model,
            embedding_batch_size=args.embedding_batch_size,
            vectorstore_batch_size=args.vectorstore_batch_size,
        )
        step_summary = finish_step_tracking()
        print(
            "✅ Ingested resource with",
            f"{ingest_result['total_chunks']} chunks into collection {ingest_result['collection_name']}",
        )
        if step_summary:
            print(f"💰 Step cost: {step_summary}")

    if args.step in ["generate_lessons", "all"]:
        print(f"🧾 Generating lessons from roadmap: {args.roadmap_file}")
        start_step_tracking("generate_lessons", "rag-roadmap", run_id)
        from chains.rag_roadmap.generate_lessons import generate_lessons_from_roadmap

        lessons = generate_lessons_from_roadmap(
            run_id=run_id,
            roadmap_path=args.roadmap_file,
            collection_name=args.collection_name,
            persist_directory=args.persist_directory,
            top_k=args.top_k or 4,
            max_steps=args.max_steps,
            force_text=args.force_text,
            embedding_model=args.embedding_model,
            embedding_batch_size=args.embedding_batch_size,
            max_workers=args.max_workers,
            source_base=args.source_base,
        )
        step_summary = finish_step_tracking()
        print(
            "✅ Generated lessons for",
            f"{lessons['total_steps_processed']} steps",
        )
        if step_summary:
            print(f"💰 Step cost: {step_summary}")

    if args.step == "all":
        print("🎉 Completed roadmap-rag pipeline")

def run_can_do_steps_pipeline(args: argparse.Namespace) -> None:
    """
    Run the can-do-steps pipeline.

    Args:
        args: Parsed command-line arguments
    """
    # Handle multiple run IDs
    run_ids = args.id if isinstance(args.id, list) else [args.id]
    total_run_count = len(run_ids)

    print(f"🚀 Processing {total_run_count} run ID(s): {', '.join(run_ids)}")

    for idx, run_id in enumerate(run_ids, 1):
        if total_run_count > 1:
            print(f"\n📋 Processing run {idx}/{total_run_count}: {run_id}")

        # If the user requested the full pipeline via --step all, run the single orchestrator once and return.
        if args.step == "all":
            print(f"🚀 Running complete can-do-steps pipeline for {run_id}")
            start_step_tracking("all", "can-do-steps", run_id)
            from chains.can_do_steps.complete_pipeline import generate_complete_can_do_hierarchy

            results = generate_complete_can_do_hierarchy(
                run_id=run_id,
                force_text=args.force_text,
                export_json=True,
                topic=args.topic,
                audience=args.audience,
                purpose=args.purpose,
                style=args.style,
                notes=args.notes,
                language=args.language,
                prompt_id=getattr(args, "prompt_id", None),
                input_prefix=getattr(args, "input_prefix", None),
            )

            step_summary = finish_step_tracking()
            print(f"🎉 Complete can-do-steps pipeline generated successfully for {run_id}")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        # UNIFIED ARCHITECTURE STEPS (individual step execution)
        if args.step in ["expand_and_create_hierarchy"]:
            print(f"🔄 Expanding statements and creating hierarchy for {run_id}")
            start_step_tracking("expand_and_create_hierarchy", "can-do-steps", run_id)
            from chains.can_do_steps.expand_and_create_hierarchy import expand_and_create_hierarchy
            hierarchy = expand_and_create_hierarchy(
                run_id=run_id,
                force_text=args.force_text,
                topic=args.topic,
                audience=args.audience,
                purpose=args.purpose,
                style=args.style,
                notes=args.notes,
                language=args.language,
                prompt_id=getattr(args, "prompt_id", None),
                input_prefix=getattr(args, "input_prefix", None),
            )
            step_summary = finish_step_tracking()
            original_count = hierarchy.get("original_count", 0)
            new_count = hierarchy.get("new_count", 0)
            total_count = hierarchy.get("total_count", 0)
            tracks_count = len(hierarchy["tracks"])
            total_paths = sum(len(track["paths"]) for track in hierarchy["tracks"])
            total_steps = sum(len(path["steps"]) for track in hierarchy["tracks"] for path in track["paths"])
            print(f"✅ Created expanded hierarchy for {run_id}: {original_count} original + {new_count} new = {total_count} total statements")
            print(f"✅ Hierarchy structure: {tracks_count} tracks, {total_paths} paths, {total_steps} steps")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["build_roadmap"]:
            print(f"🔧 Building roadmap (expand + split) for {run_id}")
            start_step_tracking("build_roadmap", "can-do-steps", run_id)
            from chains.can_do_steps.build_roadmap import build_roadmap as build_roadmap_step
            result = build_roadmap_step(
                run_id=run_id,
                force_text=args.force_text,
                topic=args.topic,
                audience=args.audience,
                purpose=args.purpose,
                style=args.style,
                notes=args.notes,
                language=args.language,
                prompt_id=getattr(args, "prompt_id", None),
                input_prefix=getattr(args, "input_prefix", None),
            )
            step_summary = finish_step_tracking()
            try:
                tracks_count = len(result.get("hierarchy", {}).get("tracks", []))
            except Exception:
                tracks_count = 0
            print(f"✅ Built roadmap for {run_id}: {tracks_count} tracks")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["split_hierarchy"]:
            print(f"✂️ Splitting hierarchy into JSON files for {run_id}")
            start_step_tracking("split_hierarchy", "can-do-steps", run_id)
            from chains.can_do_steps.split_hierarchy import split_hierarchy
            split_data = split_hierarchy(run_id=run_id)
            step_summary = finish_step_tracking()
            print(f"✅ Split hierarchy for {run_id}: {split_data['tracks_count']} tracks, {split_data['paths_count']} paths, {split_data['steps_count']} steps")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["organize_bits"]:
            print(f"🧩 Organizing into learning bits for {run_id}")
            start_step_tracking("organize_bits", "can-do-steps", run_id)
            from chains.can_do_steps.organize_bits import organize_bits
            bits = organize_bits(
                run_id=run_id,
                force_text=args.force_text,
                limit=args.limit,
                max_workers=args.max_workers,
                prompt_id=getattr(args, "prompt_id", None)
            )
            step_summary = finish_step_tracking()
            print(f"✅ Created {bits['generation_summary']['total_bits_generated']} learning bits for {run_id}")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["describe_images"]:
            print(f"🎨 Generating illustration descriptions for {run_id}")
            start_step_tracking("describe_images", "can-do-steps", run_id)
            from chains.can_do_steps.describe_images import describe_images
            descriptions = describe_images(
                run_id=run_id,
                force_text=args.force_text,
                target_level=getattr(args, 'target_level', 'all'),
                prompt_id=getattr(args, "prompt_id", None)
            )
            step_summary = finish_step_tracking()
            illustrations_count = descriptions.get('illustrations_generated', 0)
            print(f"✅ Generated {illustrations_count} illustration descriptions for {run_id}")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["find_resources"]:
            print(f"🔗 Finding supporting resources for {run_id}")
            start_step_tracking("find_resources", "can-do-steps", run_id)
            from chains.can_do_steps.find_resources import find_resources
            resources = find_resources(
                run_id=run_id,
                force_text=args.force_text,
                limit=getattr(args, "limit", None),
                max_workers=getattr(args, "max_workers", None),
                prompt_id=getattr(args, "prompt_id", None)
            )
            step_summary = finish_step_tracking()
            total_resources = resources.get("total_resources_attached", 0)
            steps_processed = resources.get("steps_processed", 0)
            print(f"✅ Attached {total_resources} resources across {steps_processed} steps for {run_id}")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["generate_media"]:
            print(f"🖼️ Generating images from existing illustrations for {run_id}")
            start_step_tracking("generate_media", "can-do-steps", run_id)
            from chains.can_do_steps.generate_media import generate_media
            media = generate_media(
                run_id=run_id,
                overwrite=getattr(args, 'overwrite', False),
                target_level=getattr(args, 'target_level', 'all'),
                generate_images=not getattr(args, 'no_images', False),
                quality=getattr(args, 'quality', 'low'),
                limit=getattr(args, 'limit', None),
                max_workers=getattr(args, 'max_workers', None),
                prompt_id=getattr(args, 'prompt_id', None),
                provider=getattr(args, 'provider', 'open-ai')
            )
            step_summary = finish_step_tracking()
            illustrations_found = media.get('illustrations_found', 0)
            images_count = media.get('images_generated', 0)
            print(f"✅ Found {illustrations_found} illustrations and generated {images_count} images for {run_id}")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["change_media"]:
            print(f"🎨 Changing media style for {run_id}")
            start_step_tracking("change_media", "can-do-steps", run_id)
            from chains.can_do_steps.change_media import change_media
            changed = change_media(
                run_id=run_id,
                source_dir=getattr(args, "source_dir", None),
                match_mode=getattr(args, "match_mode", None),
                match=getattr(args, "match", None),
                overwrite=getattr(args, "overwrite", False),
                prompt_id=getattr(args, "prompt_id", None),
                model=getattr(args, "image_model", None),
                quality=getattr(args, "quality", "standard"),
                target_level=getattr(args, "target_level", "all"),
                limit=getattr(args, "limit", None),
                max_workers=getattr(args, "max_workers", None),
                add_to_roadmap=getattr(args, "add_to_roadmap", False)
            )
            step_summary = finish_step_tracking()
            images_found = changed.get("images_found", 0)
            images_changed = changed.get("images_changed", 0)
            print(f"✅ Changed {images_changed}/{images_found} images for {run_id}")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["review_content"]:
            print(f"📝 Reviewing content quality for {run_id}")
            start_step_tracking("review_content", "can-do-steps", run_id)
            from chains.can_do_steps.review_content import review_content
            review_results = review_content(
                run_id=run_id,
                force_text=args.force_text,
                limit=getattr(args, 'limit', None),
                input_prefix=getattr(args, "input_prefix", None),
            )
            step_summary = finish_step_tracking()
            total_reviewed = review_results.get("total_steps_reviewed", 0)
            total_passed = review_results.get("total_passed", 0)
            total_failed = review_results.get("total_failed", 0)
            print(f"✅ Reviewed {total_reviewed} steps: {total_passed} passed, {total_failed} failed for {run_id}")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["fix_content"]:
            print(f"🛠️ Applying content fixes for {run_id}")
            start_step_tracking("fix_content", "can-do-steps", run_id)
            from chains.can_do_steps.fix_content import fix_content as fix_content_chain

            content_dir_path = getattr(args, "content_dir_full_path", None)
            if not content_dir_path:
                raise ValueError("Internal error: content directory path not prepared")

            fix_summary = fix_content_chain(
                run_id=run_id,
                fix_input=args.fix_input,
                content_dir=content_dir_path,
                branch_name=args.branch_name,
                force_text=args.force_text,
            )
            step_summary = finish_step_tracking()
            applied = fix_summary.get("applied", 0)
            planned = fix_summary.get("planned_items", 0)
            print(f"✅ Applied {applied} fixes (planned {planned}) for {run_id}")
            skipped = len(fix_summary.get("skipped", []))
            errors = len(fix_summary.get("errors", []))
            if skipped or errors:
                print(f"   Skipped: {skipped}, Errors: {errors}")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["fix_step"]:
            print(f"🛠️ Applying step fixes to bits for {run_id}")
            start_step_tracking("fix_step", "can-do-steps", run_id)
            from chains.can_do_steps.fix_step import fix_step as fix_step_chain

            fix_summary = fix_step_chain(
                run_id=run_id,
                fix_input=args.fix_input,
                bits_file=getattr(args, "bits_file", None),
                force_text=args.force_text,
            )
            step_summary = finish_step_tracking()
            applied = fix_summary.get("applied", 0)
            planned = fix_summary.get("planned_items", 0)
            updated = fix_summary.get("bits_updated", False)
            print(f"✅ Applied {applied} fixes (planned {planned}) for {run_id}")
            print(f"   Bits updated: {updated}")
            skipped = len(fix_summary.get("skipped", []))
            errors = len(fix_summary.get("errors", []))
            if skipped or errors:
                print(f"   Skipped: {skipped}, Errors: {errors}")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["fetch_audio"]:
            print(f"🔊 Fetching audio from Xeno-Canto for {run_id}")
            start_step_tracking("fetch_audio", "can-do-steps", run_id)
            from chains.can_do_steps.fetch_audio import fetch_audio
            audio_results = fetch_audio(
                run_id=run_id,
                overwrite=getattr(args, 'overwrite', False),
                limit=getattr(args, 'limit', None)
            )
            step_summary = finish_step_tracking()
            downloaded = audio_results.get('downloaded', 0)
            skipped = audio_results.get('skipped', 0)
            errors = audio_results.get('errors', 0)
            print(f"✅ Audio fetch complete for {run_id}: {downloaded} downloaded, {skipped} skipped, {errors} errors")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

        if args.step in ["fetch_images"]:
            if getattr(args, 'preview_only', False):
                print(f"🖼️ Building GBIF image previews for {run_id}")
            else:
                print(f"🖼️ Fetching images from GBIF for {run_id}")
            start_step_tracking("fetch_images", "can-do-steps", run_id)
            from chains.can_do_steps.fetch_images import fetch_images
            image_results = fetch_images(
                run_id=run_id,
                overwrite=getattr(args, 'overwrite', False),
                limit=getattr(args, 'limit', None),
                preview_only=getattr(args, 'preview_only', False),
                candidates_per_step=getattr(args, 'preview_candidates', 6),
                max_width=getattr(args, 'max_width', 800)
            )
            step_summary = finish_step_tracking()
            skipped = image_results.get('skipped', 0)
            errors = image_results.get('errors', 0)
            if image_results.get('preview_mode'):
                previewed = image_results.get('previewed', 0)
                print(f"✅ Image preview complete for {run_id}: {previewed} steps with candidates, {skipped} skipped, {errors} errors")
                preview_html = image_results.get('preview_html_path')
                if preview_html:
                    print(f"   Preview gallery: {preview_html}")
            else:
                downloaded = image_results.get('downloaded', 0)
                print(f"✅ Image fetch complete for {run_id}: {downloaded} downloaded, {skipped} skipped, {errors} errors")
            if step_summary:
                print(f"💰 Step cost: {step_summary}")

    if total_run_count > 1:
        print(f"\n🎉 Completed processing {total_run_count} run IDs: {', '.join(run_ids)}")

def main() -> None:
    """
    Main entry point for the application.
    """
    parser = argparse.ArgumentParser(description="Lumabit Lesson-Generation Pipeline")

    # Required arguments
    parser.add_argument("--pipeline", required=True, help="Pipeline to run: lessons, roadmap, or can-do-steps")
    parser.add_argument("--step", required=True, help="Step to run (varies by pipeline) or 'all'")
    parser.add_argument("--id", required=True, nargs='+', help="One or more run identifiers (space-separated)")

    # Optional arguments
    parser.add_argument("--pdf", help="PDF input (local path, s3:// URI, or bucket key)")
    parser.add_argument("--force-text", action="store_true", help="Use existing raw text output instead of calling API")
    parser.add_argument("--force-model", help="Force a specific LLM model for this run (overrides per-chain settings)")

    # Roadmap-specific arguments
    parser.add_argument("--track-id", help="Track ID for generating paths (roadmap pipeline)")
    parser.add_argument("--path-id", help="Path ID for generating steps (roadmap pipeline)")
    parser.add_argument("--step-id", help="Step ID for generating bits (roadmap pipeline)")

    # Roadmap-RAG specific arguments
    parser.add_argument("--resource-file", help="Resource input (local path, s3:// URI, or bucket key)")
    parser.add_argument("--roadmap-file", help="Roadmap JSON input (local path, s3:// URI, or bucket key)")
    parser.add_argument("--collection-name", help="Override vectorstore collection name for roadmap-rag pipeline")
    parser.add_argument("--persist-directory", help="Directory to persist roadmap-rag Chroma collection")
    parser.add_argument("--chunk-size", type=int, help="Chunk size for roadmap-rag ingestion")
    parser.add_argument("--chunk-overlap", type=int, help="Chunk overlap for roadmap-rag ingestion")
    parser.add_argument("--top-k", type=int, help="Number of retrieved chunks when generating lessons")
    parser.add_argument("--max-steps", type=int, help="Limit number of roadmap steps to process (roadmap-rag)")
    parser.add_argument("--embedding-model", help="Embedding model to use for roadmap-rag vectorstore")
    parser.add_argument("--embedding-batch-size", type=int, help="Batch size per embeddings API call")
    parser.add_argument("--vectorstore-batch-size", type=int, help="Number of documents per add_documents batch")
    parser.add_argument(
        "--source-base",
        help="Base URL to prepend to relative lesson references (roadmap-rag lessons)",
    )

    # Can-do-steps specific arguments
    parser.add_argument(
        "--input-prefix",
        default=os.getenv("INPUT_PREFIX", "input"),
        help="Prefix/folder for default inputs when a bare filename is provided",
    )
    parser.add_argument("--limit", type=int, help="Limit number of steps to process (for testing)")
    parser.add_argument(
        "--max-workers",
        type=int,
        help="Maximum number of concurrent worker threads (organize_bits, find_resources, or roadmap-rag lessons)",
    )
    parser.add_argument("--source-dir", help="Local source directory for change_media step")
    parser.add_argument("--match-mode", help="Selection strategy for change_media step (e.g., first-image)")
    parser.add_argument("--match", help="Optional filename mask for change_media step (e.g., '*.jpg')")
    parser.add_argument("--add-to-roadmap", action="store_true", help="Insert generated images into roadmap steps (change_media, experimental)")
    parser.add_argument("--topic", help="Topic for journey-based can-do statements expansion")
    parser.add_argument("--audience", help="Intended audience for the can-do statements")
    parser.add_argument("--purpose", help="Purpose or learning objective focus for the statements")
    parser.add_argument("--style", help="Language style to use for the statements")
    parser.add_argument("--notes", help="Additional notes or guidance to include in the prompt")
    parser.add_argument("--language", help="Language to request for generated content")
    parser.add_argument("--prompt-id", help="Override prompt identifier for prompt-aware can-do steps")
    parser.add_argument("--image-model", help="Image model override for generate_media or change_media")
    parser.add_argument("--fix-input", help="Fix instructions input (local path, s3:// URI, or bucket key)")
    parser.add_argument("--bits-file", help="Bits JSON input/output file (local path, s3:// URI, or bucket key) for fix_step")
    parser.add_argument("--content-dir", help="Subdirectory inside CONTENT_REPO containing the roadmap/bits files (fix_content step)")
    parser.add_argument("--branch-name", help="Git branch name to create or checkout (fix_content step)")

    # Generate media specific arguments
    parser.add_argument(
        "--overwrite",
        action="store_true",
        help="Overwrite existing media files (generate_media, fetch_audio, fetch_images steps)"
    )
    parser.add_argument("--target-level", choices=["roadmap", "track", "path", "step", "all"], default="all", help="Target level for description/media generation (describe_images and generate_media steps)")
    parser.add_argument("--no-images", action="store_true", help="Skip actual image generation (generate_media step)")
    parser.add_argument("--quality", choices=["low", "medium", "high", "standard"], default="low", help="Image quality for generate_media/change_media steps")
    parser.add_argument("--provider", choices=["open-ai", "google"], default="open-ai", help="Image provider for generate_media step")
    parser.add_argument("--preview-only", action="store_true", help="Generate image previews without downloading files (fetch_images step)")
    parser.add_argument("--preview-candidates", type=int, default=6, help="Number of preview candidates per step when previewing images")
    parser.add_argument("--max-width", type=int, default=800, help="Maximum width in pixels for downloaded images (fetch_images step)")
    args = parser.parse_args()

    if args.force_model:
        from utils.llm import set_forced_model
        set_forced_model(args.force_model)

    try:
        # Reset cost tracking at the start of each run
        reset_cost_tracking()

        # Start pipeline timing
        start_pipeline_timing()

        # Validate arguments
        validate_args(args)

        # Run the appropriate pipeline
        if args.pipeline == "lessons":
            run_lessons_pipeline(args)
        elif args.pipeline == "roadmap":
            run_roadmap_pipeline(args)
        elif args.pipeline == "can-do-steps":
            run_can_do_steps_pipeline(args)
        elif args.pipeline == "roadmap-rag":
            run_roadmap_rag_pipeline(args)

        print(f"✨ Pipeline completed successfully")

        # End pipeline timing
        end_pipeline_timing()

        # Print final cost and timing summary
        print_cost_summary()

    except Exception as e:
        print(f"❌ Error: {e}")
        # End pipeline timing even if there was an error
        end_pipeline_timing()
        # Still show cost and timing summary even if there was an error
        print_cost_summary()
        sys.exit(1)

if __name__ == "__main__":
    main()
