"""
Can-Do-Steps fetch images component for the Lumabit lesson-generation pipeline.
Downloads still images from the GBIF occurrence API for each roadmap step and
annotates the roadmap with attribution metadata.
"""
import os
import json
import mimetypes
import html
import hashlib
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple, Set
from urllib.parse import urlparse

import requests

from utils.io import ensure_dir, save_output
from utils.image_generator import save_generated_image, save_large_variant as save_large_variant_util
from utils.storage import path_exists, read_json, write_json, write_text, copy_path

GBIF_SPECIES_MATCH_URL = "https://api.gbif.org/v1/species/match"
GBIF_OCCURRENCE_SEARCH_URL = "https://api.gbif.org/v1/occurrence/search"
GBIF_OCCURRENCE_DETAIL_URL = "https://api.gbif.org/v1/occurrence/{occurrence_id}"
LARGE_IMAGE_MAX_WIDTH = 1600

PREFERRED_LICENSE_RANK = {
    # CC0
    "http://creativecommons.org/publicdomain/zero/1.0/legalcode": 0,
    "http://creativecommons.org/publicdomain/zero/1.0/": 0,
    "https://creativecommons.org/publicdomain/zero/1.0/": 0,
    # CC BY
    "http://creativecommons.org/licenses/by/4.0/legalcode": 1,
    "https://creativecommons.org/licenses/by/4.0/legalcode": 1,
    "http://creativecommons.org/licenses/by/4.0/": 1,
    "https://creativecommons.org/licenses/by/4.0/": 1,
    # CC BY-SA (optional, slightly lower rank)
    "http://creativecommons.org/licenses/by-sa/4.0/": 2,
    "https://creativecommons.org/licenses/by-sa/4.0/": 2,
    "http://creativecommons.org/licenses/by-sa/4.0/legalcode": 2,
    "https://creativecommons.org/licenses/by-sa/4.0/legalcode": 2,
}


def is_allowed_media_license(license_url: Optional[str]) -> bool:
    """
    Return True only for media licenses that are commercially compatible:
    - CC0
    - CC BY (any version, no NC/ND)
    - CC BY-SA (any version, no NC/ND)

    Reject anything with NC or ND, or missing/unknown license.
    """
    if not license_url:
        return False

    lic = license_url.strip().lower()

    # Allow explicit CC0 forms
    if "publicdomain/zero" in lic or "cc0" in lic or "cc0_1_0" in lic:
        return True

    # Must be some kind of Creative Commons license for CC BY / BY-SA path
    if "creativecommons.org" not in lic and "cc by" not in lic:
        return False

    # Explicitly reject non-commercial or no-derivatives
    if "nc" in lic or "nd" in lic:
        return False

    # CC BY (e.g. /licenses/by/4.0/)
    if "/licenses/by/" in lic and "sa" not in lic:
        return True

    # CC BY-SA (e.g. /licenses/by-sa/4.0/)
    if "/licenses/by-sa/" in lic or "cc by-sa" in lic:
        return True

    return False


def load_roadmap_file(run_id: str) -> Optional[Dict[str, Any]]:
    """Load the roadmap JSON file for the given run ID."""
    roadmap_path = f"output/can-do-steps/{run_id}/roadmap-{run_id}.json"

    if not path_exists(roadmap_path):
        print(f"⚠️ Roadmap file not found: {roadmap_path}")
        return None

    try:
        data = read_json(roadmap_path)
        print(f"✓ Loaded roadmap file: {roadmap_path}")
        return data
    except Exception as exc:
        print(f"⚠️ Error loading roadmap file {roadmap_path}: {exc}")
        return None


def extract_step_entries(roadmap_data: Dict[str, Any]) -> List[Dict[str, Any]]:
    """Collect step-level entries from the roadmap structure."""
    steps: List[Dict[str, Any]] = []

    for track in roadmap_data.get("tracks", []):
        for path in track.get("paths", []):
            for step in path.get("steps", []):
                steps.append({
                    "id": step.get("id", ""),
                    "title": step.get("title", ""),
                    "description": step.get("description", ""),
                    "level": step.get("level"),
                    "track_id": track.get("id"),
                    "path_id": path.get("id"),
                    "track_title": track.get("title"),
                    "path_title": path.get("title")
                })

    return steps


def apply_image_to_step(
    roadmap_data: Dict[str, Any],
    step_id: str,
    image_entry: Dict[str, Any],
    replace_existing: bool = False
) -> bool:
    """Attach image metadata to the matching step in the roadmap."""
    if not step_id:
        return False

    for track in roadmap_data.get("tracks", []):
        for path in track.get("paths", []):
            for step in path.get("steps", []):
                if step.get("id") != step_id:
                    continue

                if replace_existing:
                    step["images"] = [image_entry]
                    return True

                existing = step.get("images")
                if isinstance(existing, list):
                    already = any(
                        isinstance(item, dict) and item.get("file") == image_entry.get("file")
                        for item in existing
                    )
                    if not already:
                        existing.append(image_entry)
                elif isinstance(existing, dict):
                    # Normalise older single-dict structure into a list
                    step["images"] = [existing, image_entry]
                elif isinstance(existing, str) and existing:
                    # Deprecated string format; preserve while appending metadata
                    step["images"] = [
                        {"file": existing, "source": None},
                        image_entry
                    ]
                else:
                    step["images"] = [image_entry]

                return True

    return False


def parse_latin_name(description: str) -> Optional[Tuple[str, str]]:
    """Attempt to parse a Latin binomial name from the step description."""
    if not description:
        return None

    tokens = [
        token.strip(",.;:()[]{}")
        for token in description.split()
        if token.strip(",.;:()[]{}")
    ]

    if len(tokens) < 2:
        return None

    genus, species = tokens[0], tokens[1]
    if not genus or not species:
        return None

    genus_clean = genus[0].upper() + genus[1:] if len(genus) > 1 else genus.upper()
    species_clean = species.lower()

    return genus_clean, species_clean


def build_name_candidates(step: Dict[str, Any]) -> List[str]:
    """Construct name candidates to use with the GBIF species match endpoint."""
    candidates: List[str] = []
    description = (step.get("description") or "").strip()
    title = (step.get("title") or "").strip()

    parsed = parse_latin_name(description)
    if parsed:
        candidates.append(f"{parsed[0]} {parsed[1]}")

    if description:
        candidates.append(description)
    if title and title != description:
        candidates.append(title)

    # Deduplicate while preserving order
    seen: Dict[str, bool] = {}
    ordered: List[str] = []
    for item in candidates:
        key = item.lower()
        if key not in seen:
            seen[key] = True
            ordered.append(item)
    return ordered


def match_species(session: requests.Session, candidates: List[str]) -> Optional[Dict[str, Any]]:
    """Match a species name using GBIF species match endpoint."""
    if not candidates:
        return None

    for name in candidates:
        try:
            response = session.get(
                GBIF_SPECIES_MATCH_URL,
                params={"name": name},
                timeout=30
            )
            response.raise_for_status()
        except requests.RequestException as exc:
            print(f"    ❌ Species match failed for '{name}': {exc}")
            continue

        try:
            data = response.json()
        except ValueError as exc:
            print(f"    ❌ Invalid JSON from species match for '{name}': {exc}")
            continue

        usage_key = data.get("usageKey")
        if usage_key:
            print(f"    🔎 Matched species '{name}' → usageKey {usage_key}")
            return data

        print(f"    ⚠️ No usageKey returned for '{name}', trying next candidate")

    return None


def enumerate_media_candidates(occurrences: List[Dict[str, Any]]):
    """
    Yield candidate still images from the provided occurrence records.

    Produces tuples containing (license_rank, occurrence_index, media_index, occurrence, media).
    """
    for occ_index, occ in enumerate(occurrences):
        occ_key = occ.get("key")
        if not occ_key:
            continue

        media_items = occ.get("media") or []
        for media_index, media in enumerate(media_items):
            media_type = (media.get("type") or "").lower()
            if media_type != "stillimage":
                continue

            identifier = media.get("identifier")
            if not identifier:
                continue

            license_url = (media.get("license") or "").strip()

            # 🔐 Skip media that isn't CC0 / CC BY / CC BY-SA
            if not is_allowed_media_license(license_url):
                continue

            license_url_lower = license_url.lower()
            rank = PREFERRED_LICENSE_RANK.get(license_url_lower, 5 if license_url else 10)

            yield (rank, occ_index, media_index, occ, media)


def select_preferred_media(occurrences: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
    """Select the best available still image from GBIF occurrence results."""
    candidates = sorted(enumerate_media_candidates(occurrences), key=lambda item: (item[0], item[1], item[2]))
    if not candidates:
        return None

    _, _, _, occurrence, media = candidates[0]
    return {
        "occurrence": occurrence,
        "media": media
    }



def gbif_occurrence_thumbnail_url(
    occurrence_key: Optional[int],
    identifier: Optional[str],
    size: str = "200x"
) -> Optional[str]:
    """
    Build a GBIF cached/thumbnail URL for an occurrence media item.

    Example:
      https://api.gbif.org/v1/image/cache/200x/occurrence/<key>/media/<md5(identifier)>
    """
    if not occurrence_key or not identifier:
        return identifier
    try:
        digest = hashlib.md5(identifier.encode("utf-8")).hexdigest()
    except Exception:
        return identifier
    return f"https://api.gbif.org/v1/image/cache/{size}/occurrence/{occurrence_key}/media/{digest}"


def build_media_previews(
    occurrences: List[Dict[str, Any]],
    usage_key: Optional[int],
    limit: int
) -> List[Dict[str, Any]]:
    """Collect a list of candidate media previews up to the requested limit."""
    previews: List[Dict[str, Any]] = []

    sorted_candidates = sorted(
        enumerate_media_candidates(occurrences),
        key=lambda item: (item[0], item[1], item[2])
    )

    for order, candidate in enumerate(sorted_candidates[:limit], start=1):
        _, _, _, occurrence, media = candidate
        identifier = media.get("identifier")
        occurrence_key = occurrence.get("key")
        media_identifier = media.get("identifier") or f"media-{order}"
        selection_key = f"{occurrence_key}|{media_identifier}"

        previews.append({
            "order": order,
            "image_url": identifier,
            "thumbnail_url": gbif_occurrence_thumbnail_url(occurrence_key, identifier),  # GBIF often uses same URL; keep separate for clarity
            "license": media.get("license"),
            "rightsHolder": media.get("rightsHolder"),
            "publisher": occurrence.get("publisher"),
            "datasetKey": occurrence.get("datasetKey"),
            "datasetName": occurrence.get("datasetName") or occurrence.get("datasetTitle"),
            "eventDate": occurrence.get("eventDate"),
            "gbifOccurrenceId": occurrence_key,
            "gbifTaxonKey": usage_key,
            "gbifMediaIdentifier": media.get("identifier"),
            "gbifReferences": media.get("references") or occurrence.get("references"),
            "caption": media.get("title") or media.get("description"),
            "credit": media.get("creator") or media.get("contributor"),
            "identifierType": media.get("identifierType"),
            "licenseRank": candidate[0],
            "selectionKey": selection_key
        })

    return previews


def build_preview_gallery_html(results: List[Dict[str, Any]], run_id: str) -> str:
    """Render a simple HTML gallery for previewing candidate images."""
    title = f"GBIF Image Preview – {html.escape(run_id)}"
    lines = [
        "<!DOCTYPE html>",
        "<html lang=\"en\">",
        "<head>",
        f"  <meta charset=\"utf-8\" />",
        f"  <title>{title}</title>",
        "  <style>",
        "    body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; margin: 2rem; background: #f8fafc; color: #0f172a; }",
        "    h1 { margin-bottom: 0.5rem; }",
        "    .step { margin-bottom: 2rem; padding: 1.5rem; border-radius: 1rem; background: #ffffff; box-shadow: 0 1px 2px rgba(15, 23, 42, 0.1); }",
        "    .step header { display: flex; flex-direction: column; gap: 0.25rem; margin-bottom: 1rem; }",
        "    .step-id { font-size: 0.875rem; color: #475569; }",
        "    .candidates { display: grid; gap: 1rem; grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); }",
        "    figure { margin: 0; border: 1px solid #e2e8f0; border-radius: 0.75rem; overflow: hidden; background: #f1f5f9; display: flex; flex-direction: column; position: relative; transition: border-color 0.2s ease, box-shadow 0.2s ease; }",
        "    figure.selected { border-color: #2563eb; box-shadow: 0 0 0 2px rgba(37, 99, 235, 0.2); }",
        "    figure img { width: 100%; height: 180px; object-fit: cover; background: #cbd5f5; }",
        "    figcaption { padding: 0.75rem 1rem; display: flex; flex-direction: column; gap: 0.4rem; font-size: 0.9rem; }",
        "    .meta { color: #475569; font-size: 0.8rem; line-height: 1.3; }",
        "    a { color: #2563eb; text-decoration: none; }",
        "    a:hover { text-decoration: underline; }",
        "    .select-button { margin-top: 0.5rem; padding: 0.5rem 0.75rem; font-size: 0.85rem; border-radius: 0.5rem; border: 1px solid #2563eb; color: #2563eb; background: #ffffff; cursor: pointer; transition: background 0.2s ease, color 0.2s ease; }",
        "    .select-button:hover, .select-button:focus { background: #2563eb; color: #ffffff; outline: none; }",
        "    .selection-output { margin: 2.5rem 0; padding: 1.5rem; border-radius: 1rem; background: #ffffff; box-shadow: 0 1px 2px rgba(15, 23, 42, 0.1); display: flex; flex-direction: column; gap: 0.75rem; }",
        "    .selection-output textarea { width: 100%; min-height: 160px; font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; font-size: 0.9rem; padding: 1rem; border-radius: 0.75rem; border: 1px solid #cbd5f5; background: #f8fafc; resize: vertical; }",
        "    .selection-output .selection-actions { display: flex; flex-wrap: wrap; gap: 0.75rem; }",
        "    .selection-output button { padding: 0.6rem 1rem; font-size: 0.9rem; border-radius: 0.75rem; border: none; background: #2563eb; color: #ffffff; cursor: pointer; transition: background 0.2s ease; }",
        "    .selection-output button:hover { background: #1d4ed8; }",
        "    .selection-status { font-size: 0.9rem; color: #475569; }",
        "    .selection-order-badge { position: absolute; top: 0.5rem; left: 0.5rem; width: 2rem; height: 2rem; border-radius: 999px; background: rgba(37, 99, 235, 0.9); color: #ffffff; display: flex; align-items: center; justify-content: center; font-weight: 600; font-size: 0.9rem; pointer-events: none; }",
        "    figure:not(.selected) .selection-order-badge { display: none; }",
        "    @media (max-width: 600px) { .selection-output textarea { min-height: 220px; } }",
        "  </style>",
        "</head>",
        "<body>",
        f"  <h1>{title}</h1>",
        "  <p>Click any image to open the original source. Use the <strong>Select</strong> button on each candidate to build a manifest of GBIF occurrence IDs. Selected images are numbered in the order they were chosen.</p>"
    ]

    lines.extend([
        "  <section class=\"selection-output\">",
        "    <h2>Selection Manifest</h2>",
        "    <p class=\"selection-status\" id=\"selectionStatus\">No selections yet.</p>",
        "    <textarea id=\"selectionOutput\" placeholder=\"Selections will appear here as JSON\"></textarea>",
        "    <div class=\"selection-actions\">",
        "      <button type=\"button\" id=\"copySelection\">Copy JSON to clipboard</button>",
        "      <button type=\"button\" id=\"loadSelections\">Load selections</button>",
        "    </div>",
        "  </section>"
    ])

    for record in results:
        if record.get("status") != "preview":
            continue

        step_id = html.escape(record.get("step_id") or "")
        step_title = html.escape(record.get("title") or step_id)
        step_description = html.escape(record.get("description") or "")
        candidates = record.get("candidates") or []

        lines.extend([
            "  <section class=\"step\">",
            "    <header>",
            f"      <h2>{step_title}</h2>",
            f"      <div class=\"step-id\">Step ID: {step_id}</div>"
        ])
        if step_description:
            lines.append(f"      <div class=\"step-description\">{step_description}</div>")
        lines.extend([
            "    </header>",
            "    <div class=\"candidates\">"
        ])

        for candidate in candidates:
            image_url = candidate.get("image_url") or ""
            thumbnail_url = candidate.get("thumbnail_url") or image_url
            if not image_url:
                continue

            occurrence_id = candidate.get("gbifOccurrenceId")
            if occurrence_id is None:
                continue

            media_identifier = candidate.get("gbifMediaIdentifier") or candidate.get("image_url") or f"media-{candidate.get('order')}"
            selection_key = candidate.get("selectionKey") or f"{occurrence_id}|{media_identifier}"
            escaped_selection_key = html.escape(str(selection_key))
            escaped_media_identifier = html.escape(str(media_identifier)) if media_identifier else ""

            escaped_url = html.escape(image_url)
            escaped_thumb_url = html.escape(thumbnail_url)
            license_text = html.escape(candidate.get("license") or "Unknown license")
            rights_holder = html.escape(candidate.get("rightsHolder") or "Unknown rights holder")
            dataset_name = html.escape(candidate.get("datasetName") or candidate.get("datasetKey") or "Unknown dataset")
            event_date = html.escape(candidate.get("eventDate") or "Unknown date")
            caption = html.escape(candidate.get("caption") or "")

            lines.extend([
                f"      <figure data-step-id=\"{step_id}\" data-occurrence-id=\"{occurrence_id}\" data-media-identifier=\"{escaped_media_identifier}\" data-selection-key=\"{escaped_selection_key}\">",
                "        <div class=\"selection-order-badge\"></div>",
                f"        <a href=\"{escaped_url}\" target=\"_blank\" rel=\"noopener noreferrer\">",
                f"          <img src=\"{escaped_thumb_url}\" alt=\"Candidate preview\" loading=\"lazy\" />",
                "        </a>",
                "        <figcaption>",
                f"          <strong>Order {candidate.get('order')}</strong>",
                f"          <span>{caption}</span>" if caption else "",
                f"          <div class=\"meta\">License: {license_text}</div>",
                f"          <div class=\"meta\">Rights holder: {rights_holder}</div>",
                f"          <div class=\"meta\">Dataset: {dataset_name}</div>",
                f"          <div class=\"meta\">Event date: {event_date}</div>",
                "          <button type=\"button\" class=\"select-button\">Select / Remove</button>",
                "        </figcaption>",
                "      </figure>"
            ])

        lines.extend([
            "    </div>",
            "  </section>"
        ])

    lines.extend([
        "  <script>",
        "    document.addEventListener('DOMContentLoaded', function () {",
        "      const selectionOutput = document.getElementById('selectionOutput');",
        "      const selectionStatus = document.getElementById('selectionStatus');",
        "      const copyButton = document.getElementById('copySelection');",
        "      const loadButton = document.getElementById('loadSelections');",
        "      const selections = {};",
        "",
        "      function normalizeSelectionEntry(raw, index, stepId) {",
        "        if (raw === undefined || raw === null) {",
        "          return null;",
        "        }",
        "        const isObject = typeof raw === 'object' && !Array.isArray(raw);",
        "        const rawSelection = isObject ? (raw.selectionKey ?? raw.selection_key ?? raw.id ?? raw.value) : raw;",
        "        const rawOccurrence = isObject ? (raw.occurrenceId ?? raw.occurrence_id) : null;",
        "",
        "        let selectionKey = rawSelection !== undefined && rawSelection !== null ? String(rawSelection).trim() : '';",
        "        let occurrenceId = rawOccurrence !== undefined && rawOccurrence !== null ? String(rawOccurrence).trim() : '';",
        "        let mediaIdentifier = null;",
        "",
        "        if (selectionKey && selectionKey.includes('|')) {",
        "          const parts = selectionKey.split('|');",
        "          if (!occurrenceId && parts[0]) {",
        "            occurrenceId = parts[0];",
        "          }",
        "          mediaIdentifier = parts.slice(1).join('|') || null;",
        "        }",
        "",
        "        if (!selectionKey) {",
        "          if (occurrenceId) {",
        "            selectionKey = `${occurrenceId}-legacy-${index}`;",
        "          } else {",
        "            selectionKey = `legacy-${stepId ?? 'step'}-${index}`;",
        "          }",
        "        }",
        "",
        "        if (!occurrenceId) {",
        "          occurrenceId = selectionKey.split('|')[0] || selectionKey;",
        "        }",
        "",
        "        return {",
        "          selectionKey: selectionKey,",
        "          occurrenceId: occurrenceId,",
        "          mediaIdentifier: mediaIdentifier",
        "        };",
        "      }",
        "",
        "      function ensureSelectionObjects(stepId) {",
        "        if (!Array.isArray(selections[stepId])) {",
        "          selections[stepId] = [];",
        "        }",
        "        const list = selections[stepId];",
        "        for (let i = 0; i < list.length; i += 1) {",
        "          const normalized = normalizeSelectionEntry(list[i], i, stepId);",
        "          list[i] = normalized;",
        "        }",
        "        selections[stepId] = list.filter(Boolean);",
        "        return selections[stepId];",
        "      }",
        "",
        "      function getTrimmedSelections() {",
        "        const trimmed = {};",
        "        Object.keys(selections).forEach(stepId => {",
        "          const list = ensureSelectionObjects(stepId);",
        "          if (!list || list.length === 0) {",
        "            return;",
        "          }",
        "          const selectionKeys = list",
        "            .map(item => item && item.selectionKey)",
        "            .filter(key => key !== undefined && key !== null && key !== '');",
        "          if (selectionKeys.length === 0) {",
        "            return;",
        "          }",
        "          trimmed[stepId] = selectionKeys;",
        "        });",
        "        return trimmed;",
        "      }",
        "",
        "      function getAllStepIds() {",
        "        const ids = new Set();",
        "        document.querySelectorAll('figure[data-step-id]').forEach(figure => {",
        "          if (figure.dataset.stepId) {",
        "            ids.add(figure.dataset.stepId);",
        "          }",
        "        });",
        "        return Array.from(ids);",
        "      }",
        "",
        "      function cssEscape(value) {",
        "        const stringValue = String(value ?? '');",
        "        if (window.CSS && typeof window.CSS.escape === 'function') {",
        "          return window.CSS.escape(stringValue);",
        "        }",
        "        return stringValue.replace(/([ \\\"'\\\\#.:;,!?+<>=@{}\\[\\]()$%^&*|`~])/g, '\\\\$1');",
        "      }",
        "",
        "      function applyManifestToSelections(manifest) {",
        "        const allStepIds = getAllStepIds();",
        "        allStepIds.forEach(stepId => {",
        "          selections[stepId] = [];",
        "          updateOrderBadges(stepId);",
        "        });",
        "",
        "        let appliedCount = 0;",
        "        Object.keys(manifest).forEach(stepId => {",
        "          const occurrences = Array.isArray(manifest[stepId]) ? manifest[stepId] : null;",
        "          if (!occurrences || occurrences.length === 0) {",
        "            return;",
        "          }",
        "          const stepSelector = cssEscape(stepId);",
        "          const current = ensureSelectionObjects(stepId);",
        "          occurrences.forEach((occurrence, occurrenceIndex) => {",
        "            const entry = normalizeSelectionEntry(occurrence, occurrenceIndex, stepId);",
        "            if (!entry) {",
        "              return;",
        "            }",
        "            const selectionSelector = entry.selectionKey ? cssEscape(entry.selectionKey) : null;",
        "            const occurrenceSelector = entry.occurrenceId ? cssEscape(entry.occurrenceId) : null;",
        "",
        "            let figure = null;",
        "            if (selectionSelector) {",
        "              figure = document.querySelector(`figure[data-step-id=\"${stepSelector}\"][data-selection-key=\"${selectionSelector}\"]`);",
        "            }",
        "            if (!figure && occurrenceSelector) {",
        "              figure = document.querySelector(`figure[data-step-id=\"${stepSelector}\"][data-occurrence-id=\"${occurrenceSelector}\"]`);",
        "            }",
        "            if (!figure) {",
        "              return;",
        "            }",
        "            const selectionKey = figure.dataset.selectionKey;",
        "            if (!selectionKey) {",
        "              return;",
        "            }",
        "            const occurrenceId = figure.dataset.occurrenceId || entry.occurrenceId;",
        "            const alreadySelected = current.some(item => item && item.selectionKey === selectionKey);",
        "            if (alreadySelected) {",
        "              return;",
        "            }",
        "            current.push({",
        "              selectionKey: String(selectionKey),",
        "              occurrenceId: String(occurrenceId || selectionKey),",
        "              mediaIdentifier: figure.dataset.mediaIdentifier || entry.mediaIdentifier || undefined",
        "            });",
        "            appliedCount += 1;",
        "          });",
        "          updateOrderBadges(stepId);",
        "        });",
        "",
        "        allStepIds.forEach(stepId => {",
        "          if (!manifest[stepId]) {",
        "            updateOrderBadges(stepId);",
        "          }",
        "        });",
        "",
        "        return appliedCount;",
        "      }",
        "",
        "      function updateSelectionStatus() {",
        "        const hasSelections = Object.values(selections).some(arr => Array.isArray(arr) && arr.length > 0);",
        "        selectionStatus.textContent = hasSelections",
        "          ? 'Selections ready — copy the JSON below.'",
        "          : 'No selections yet.';",
        "      }",
        "",
        "      function renderJSON() {",
        "        const manifest = getTrimmedSelections();",
        "        const hasSelections = Object.keys(manifest).length > 0;",
        "        selectionOutput.value = hasSelections ? JSON.stringify(manifest, null, 2) : '';",
        "        updateSelectionStatus();",
        "      }",
        "",
        "      function updateOrderBadges(stepId) {",
        "        const selectionsForStep = ensureSelectionObjects(stepId);",
        "        const figures = document.querySelectorAll(`figure[data-step-id=\"${stepId}\"]`);",
        "        figures.forEach(figure => {",
        "          const badge = figure.querySelector('.selection-order-badge');",
        "          const selectionKey = figure.dataset.selectionKey;",
        "          const position = selectionsForStep.findIndex(item => item && item.selectionKey === selectionKey);",
        "          if (position === -1) {",
        "            figure.classList.remove('selected');",
        "            if (badge) {",
        "              badge.textContent = '';",
        "            }",
        "          } else {",
        "            figure.classList.add('selected');",
        "            if (badge) {",
        "              badge.textContent = position + 1;",
        "            }",
        "          }",
        "        });",
        "      }",
        "",
        "      document.querySelectorAll('.select-button').forEach(button => {",
        "        button.addEventListener('click', function (event) {",
        "          event.preventDefault();",
        "          event.stopPropagation();",
        "          const figure = button.closest('figure');",
        "          if (!figure) {",
        "            return;",
        "          }",
        "          const stepId = figure.dataset.stepId;",
        "          const occurrenceId = figure.dataset.occurrenceId;",
        "          const selectionKey = figure.dataset.selectionKey;",
        "          const mediaIdentifier = figure.dataset.mediaIdentifier;",
        "",
        "          if (!stepId || !occurrenceId || !selectionKey) {",
        "            return;",
        "          }",
        "",
        "          const current = ensureSelectionObjects(stepId);",
        "          const existingIndex = current.findIndex(item => item && item.selectionKey === selectionKey);",
        "",
        "          if (existingIndex === -1) {",
        "            current.push({",
        "              selectionKey: String(selectionKey),",
        "              occurrenceId: String(occurrenceId),",
        "              mediaIdentifier: mediaIdentifier ? String(mediaIdentifier) : undefined",
        "            });",
        "          } else {",
        "            current.splice(existingIndex, 1);",
        "          }",
        "",
        "          updateOrderBadges(stepId);",
        "          renderJSON();",
        "        });",
        "      });",
        "",
        "      copyButton.addEventListener('click', function () {",
        "        const manifest = getTrimmedSelections();",
        "        const manifestText = JSON.stringify(manifest, null, 2);",
        "",
        "        if (!manifestText || manifestText === '{}') {",
        "          selectionStatus.textContent = 'Nothing to copy yet — select at least one image.';",
        "          return;",
        "        }",
        "",
        "        if (navigator.clipboard && navigator.clipboard.writeText) {",
        "          navigator.clipboard.writeText(manifestText).then(() => {",
        "            selectionStatus.textContent = 'Copied to clipboard!';",
        "          }).catch(() => {",
        "            selectionStatus.textContent = 'Unable to copy automatically. Please select and copy the JSON manually.';",
        "          });",
        "        } else {",
        "          selectionOutput.select();",
        "          try {",
        "            const successful = document.execCommand('copy');",
        "            selectionStatus.textContent = successful",
        "              ? 'Copied to clipboard!'",
        "              : 'Unable to copy automatically. Please copy manually.';",
        "          } catch (err) {",
        "            selectionStatus.textContent = 'Unable to copy automatically. Please copy manually.';",
        "          }",
        "        }",
        "      });",
        "",
        "      if (loadButton) {",
        "        loadButton.addEventListener('click', function () {",
        "          const raw = (selectionOutput.value || '').trim();",
        "          if (!raw) {",
        "            selectionStatus.textContent = 'Paste a selection manifest into the textarea first.';",
        "            return;",
        "          }",
        "          let parsed;",
        "          try {",
        "            parsed = JSON.parse(raw);",
        "          } catch (error) {",
        "            selectionStatus.textContent = 'Invalid JSON — please check the format and try again.';",
        "            return;",
        "          }",
        "          if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {",
        "            selectionStatus.textContent = 'Selection manifest must be a JSON object keyed by step ID.';",
        "            return;",
        "          }",
        "          const applied = applyManifestToSelections(parsed);",
        "          renderJSON();",
        "          selectionStatus.textContent = applied > 0",
        "            ? `Loaded ${applied} selection${applied === 1 ? '' : 's'} from JSON.`",
        "            : 'No matching selections were found for the provided JSON.';",
        "        });",
        "      }",
        "",
        "      renderJSON();",
        "    });",
        "  </script>",
        "</body>",
        "</html>"
    ])

    return "\n".join(line for line in lines if line != "")


def derive_extension(url: str, content_type: Optional[str]) -> str:
    """Derive a file extension from URL or content-type."""
    parsed = urlparse(url)
    ext = os.path.splitext(parsed.path)[1]
    if ext and len(ext) <= 5:
        return ext

    if content_type:
        guessed = mimetypes.guess_extension(content_type.split(";")[0].strip())
        if guessed:
            return guessed

    return ".jpg"


def save_large_variant(
    image_data: bytes,
    final_filename: str,
    output_dir: str,
    *,
    target_width: int = LARGE_IMAGE_MAX_WIDTH
) -> Tuple[Optional[str], Optional[str]]:
    """
    Save a large version of the downloaded image alongside the primary asset.
    """
    return save_large_variant_util(
        image_data=image_data,
        final_filename=final_filename,
        output_dir=output_dir,
        target_width=target_width
    )


def load_selection_manifest(run_id: str) -> Tuple[Optional[Dict[str, List[str]]], str]:
    """
    Attempt to load a selection manifest for the given run ID.

    The manifest is expected at output/can-do-steps/{run_id}/logs/selections-{run_id}.json
    (with backward-compatible fallback to the run root) and should be a mapping
    of step IDs to ordered occurrence ID lists.
    """
    candidate_paths = [
        f"output/can-do-steps/{run_id}/logs/selections-{run_id}.json",
        f"output/can-do-steps/{run_id}/selections-{run_id}.json",
    ]
    manifest_path = candidate_paths[0]
    data = None

    for candidate in candidate_paths:
        if not path_exists(candidate):
            continue
        manifest_path = candidate
        try:
            data = read_json(candidate)
            break
        except Exception as exc:
            print(f"⚠️ Failed to load selection manifest {candidate}: {exc}")
            return None, candidate

    if data is None:
        return None, manifest_path

    if not isinstance(data, dict):
        print(f"⚠️ Selection manifest {manifest_path} is not a JSON object; ignoring.")
        return None, manifest_path

    normalized: Dict[str, List[str]] = {}
    for step_id, occurrences in data.items():
        if not occurrences:
            continue
        if isinstance(occurrences, list):
            normalized[str(step_id)] = [str(item) for item in occurrences if item is not None]
        else:
            print(f"⚠️ Selection manifest entry for '{step_id}' is not a list; skipping.")

    if not normalized:
        print(f"⚠️ Selection manifest {manifest_path} contains no usable selections.")
        return None, manifest_path

    return normalized, manifest_path


def parse_selection_entry(value: Any, index: int = 0) -> Optional[Dict[str, Optional[str]]]:
    """
    Normalise a manifest entry into a dict containing:
      - selection_key: combined occurrence + media identifier (if available)
      - occurrence_id: the occurrence identifier
      - media_identifier: the media identifier portion, if present
    """
    if value is None:
        return None

    selection_key: str = ""
    occurrence_id: str = ""
    media_identifier: Optional[str] = None

    if isinstance(value, dict):
        selection_key = str(value.get("selectionKey") or value.get("selection_key") or value.get("id") or value.get("value") or "").strip()
        occurrence_id = str(value.get("occurrenceId") or value.get("occurrence_id") or "").strip()
    else:
        selection_key = str(value).strip()

    if selection_key and "|" in selection_key:
        occurrence_part, media_part = selection_key.split("|", 1)
        if not occurrence_id:
            occurrence_id = occurrence_part
        media_identifier = media_part or None

    if not selection_key:
        if occurrence_id:
            selection_key = f"{occurrence_id}-legacy-{index}"
        else:
            return None

    if not occurrence_id:
        occurrence_id = selection_key.split("|", 1)[0] or selection_key

    return {
        "selection_key": selection_key,
        "occurrence_id": occurrence_id,
        "media_identifier": media_identifier
    }


def normalize_selection_entries(entries: Optional[List[Any]]) -> List[Dict[str, Optional[str]]]:
    """Deduplicate and normalise raw selection manifest entries."""
    if not entries:
        return []

    if not isinstance(entries, list):
        entries = [entries]

    normalized: List[Dict[str, Optional[str]]] = []
    seen_keys: Set[str] = set()

    for index, value in enumerate(entries):
        parsed = parse_selection_entry(value, index=index)
        if not parsed:
            continue
        selection_key = parsed.get("selection_key")
        if selection_key and selection_key in seen_keys:
            continue
        if selection_key:
            seen_keys.add(selection_key)
        normalized.append(parsed)

    return normalized


def fetch_occurrence_media_by_id(
    session: requests.Session,
    occurrence_id: str,
    target_media_identifier: Optional[str] = None
) -> Optional[Tuple[Dict[str, Any], Optional[Dict[str, Any]]]]:
    """
    Fetch a GBIF occurrence by ID and return occurrence data along with a still image media entry
    that has an allowed license. If target_media_identifier is provided, attempt to return the matching
    media item first, otherwise fall back to the first allowed still image.
    """
    detail_url = GBIF_OCCURRENCE_DETAIL_URL.format(occurrence_id=occurrence_id)
    try:
        response = session.get(detail_url, timeout=30)
        response.raise_for_status()
    except requests.RequestException as exc:
        print(f"    ❌ Failed to retrieve occurrence {occurrence_id}: {exc}")
        return None

    try:
        occurrence = response.json()
    except ValueError as exc:
        print(f"    ❌ Invalid JSON for occurrence {occurrence_id}: {exc}")
        return None

    media_items = occurrence.get("media") or []
    selected_media: Optional[Dict[str, Any]] = None

    for media in media_items:
        if (media.get("type") or "").lower() != "stillimage":
            continue
        identifier = media.get("identifier")
        if not identifier:
            continue

        license_url = media.get("license")
        # 🔐 Enforce allowed license set
        if not is_allowed_media_license(license_url):
            continue

        if target_media_identifier and identifier == target_media_identifier:
            selected_media = media
            break

        if selected_media is None:
            selected_media = media

    if selected_media:
        if target_media_identifier and selected_media.get("identifier") != target_media_identifier:
            print(f"    ⚠️ Occurrence {occurrence_id} missing requested media identifier; using first allowed still image")
        return occurrence, selected_media

    print(f"    ⚠️ Occurrence {occurrence_id} has no usable still images with allowed license")
    return occurrence, None


def fetch_images(
    run_id: str,
    overwrite: bool = False,
    limit: Optional[int] = None,
    preview_only: bool = False,
    candidates_per_step: int = 6,
    max_width: int = 800
) -> Dict[str, Any]:
    """Fetch GBIF still images for each roadmap step with optional preview mode."""
    roadmap = load_roadmap_file(run_id)
    if not roadmap:
        raise FileNotFoundError(f"Roadmap for run '{run_id}' not found")

    steps = extract_step_entries(roadmap)
    if not steps:
        print("⚠️ No steps found in roadmap; nothing to fetch")
        steps = []

    if limit is not None and limit < len(steps):
        steps = steps[:limit]

    output_subdir = "previews" if preview_only else "images"
    output_dir = f"output/can-do-steps/{run_id}/{output_subdir}"
    ensure_dir(output_dir)

    session = requests.Session()
    user_agent = os.getenv("GBIF_USER_AGENT", "LumabitFetchImages/1.0")
    session.headers.update({
        "User-Agent": user_agent
    })

    selection_manifest, selection_manifest_path = load_selection_manifest(run_id)
    if selection_manifest:
        print(f"📄 Found selection manifest: {selection_manifest_path} (steps: {len(selection_manifest)})")
    else:
        print(f"ℹ️ No selection manifest found (expected at {selection_manifest_path})")

    results: List[Dict[str, Any]] = []
    downloaded_count = 0
    previewed_count = 0
    skipped_count = 0
    error_count = 0
    roadmap_modified = False
    updated_steps: List[str] = []
    cleared_steps: Set[str] = set()

    target_max_width = max(1, max_width) if max_width else 800

    mode_label = "Building image previews" if preview_only else "Fetching images"
    print(f"🖼️ {mode_label} for {len(steps)} steps")
    if overwrite and not preview_only:
        print("ℹ️ Overwrite enabled: existing images for a step will be replaced before adding new ones.")

    for index, step in enumerate(steps, start=1):
        step_id = step.get("id") or f"step-{index}"
        record: Dict[str, Any] = {
            "step_id": step_id,
            "title": step.get("title", ""),
            "description": step.get("description", ""),
            "track_id": step.get("track_id"),
            "path_id": step.get("path_id"),
            "status": "pending"
        }

        print(f"  [{index}/{len(steps)}] Processing step '{step_id}'")

        selected_entries: List[Dict[str, Optional[str]]] = []
        if selection_manifest:
            raw_selected = selection_manifest.get(step_id)
            if not raw_selected:
                step_slug = step.get("slug")
                if step_slug:
                    raw_selected = selection_manifest.get(step_slug)
            selected_entries = normalize_selection_entries(raw_selected)

        if selected_entries and not preview_only:
            print(f"    🎯 Using selection manifest ({len(selected_entries)} selection(s))")
            for occ_index, entry in enumerate(selected_entries, start=1):
                occurrence_id_str = str(entry.get("occurrence_id") or "").strip()
                selection_key = entry.get("selection_key")
                media_identifier = entry.get("media_identifier")
                if not occurrence_id_str:
                    selection_record = {
                        "step_id": step_id,
                        "title": step.get("title", ""),
                        "description": step.get("description", ""),
                        "track_id": step.get("track_id"),
                        "path_id": step.get("path_id"),
                        "status": "skipped",
                        "selected_via_manifest": True,
                        "selection_order": occ_index,
                        "message": "Missing occurrence ID in selection manifest"
                    }
                    skipped_count += 1
                    results.append(selection_record)
                    print("    ⚠️ Skipped: missing occurrence ID in manifest")
                    continue
                selection_record = {
                    "step_id": step_id,
                    "title": step.get("title", ""),
                    "description": step.get("description", ""),
                    "track_id": step.get("track_id"),
                    "path_id": step.get("path_id"),
                    "status": "pending",
                    "selected_via_manifest": True,
                    "selection_order": occ_index,
                    "occurrence_id": occurrence_id_str,
                    "selection_key": selection_key,
                    "media_identifier": media_identifier
                }

                occurrence_media = fetch_occurrence_media_by_id(
                    session,
                    occurrence_id_str,
                    target_media_identifier=media_identifier
                )
                if not occurrence_media:
                    selection_record.update({
                        "status": "error",
                        "message": "Occurrence retrieval failed"
                    })
                    error_count += 1
                    results.append(selection_record)
                    continue

                occurrence, media = occurrence_media
                if not media or not media.get("identifier"):
                    selection_record.update({
                        "status": "skipped",
                        "message": "No usable still image for occurrence"
                    })
                    skipped_count += 1
                    results.append(selection_record)
                    continue

                identifier = media.get("identifier")
                extension = derive_extension(identifier, media.get("format"))
                filename_base = f"{step_id}-{occurrence_id_str}"
                filename = f"{filename_base}.webp"
                destination_path = os.path.join(output_dir, filename)

                if path_exists(destination_path) and not overwrite:
                    alt_filename = f"{filename_base}-{occ_index}.webp"
                    alt_path = os.path.join(output_dir, alt_filename)
                    if path_exists(alt_path) and not overwrite:
                        selection_record.update({
                            "status": "skipped",
                            "message": "File already exists",
                            "file_path": destination_path
                        })
                        skipped_count += 1
                        results.append(selection_record)
                        print("    ⏭️  Skipped (file exists)")
                        continue
                    filename = alt_filename
                    destination_path = alt_path

                try:
                    image_response = session.get(identifier, timeout=90)
                    image_response.raise_for_status()
                except requests.RequestException as exc:
                    selection_record.update({
                        "status": "error",
                        "message": f"Image download failed: {exc}",
                        "image_url": identifier
                    })
                    error_count += 1
                    results.append(selection_record)
                    print(f"    ❌ Image download failed: {exc}")
                    continue

                save_result = save_generated_image(
                    image_data=image_response.content,
                    filename=filename,
                    output_dir=output_dir,
                    metadata=None,
                    max_width=target_max_width,
                    force_webp=True,
                    thumbnail_size=256
                )

                if not save_result.get("success", True):
                    selection_record.update({
                        "status": "error",
                        "message": save_result.get("error", "Failed to save image"),
                        "image_url": identifier
                    })
                    error_count += 1
                    results.append(selection_record)
                    print(f"    ❌ Failed to save image: {save_result.get('error')}")
                    continue

                destination_path = save_result.get("image_path", destination_path)
                final_filename = os.path.basename(destination_path)
                thumbnail_filename = save_result.get("thumbnail_filename")
                thumbnail_path = save_result.get("thumbnail_path")
                large_filename, large_path = save_large_variant(
                    image_data=image_response.content,
                    final_filename=final_filename,
                    output_dir=output_dir
                )

                image_entry = {
                    "file": final_filename,
                    "source": identifier,
                    "license": media.get("license"),
                    "rightsHolder": media.get("rightsHolder"),
                    "publisher": occurrence.get("publisher"),
                    "gbifOccurrenceId": occurrence.get("key") or occurrence_id_str,
                    "gbifTaxonKey": occurrence.get("taxonKey"),
                    "gbifMediaIdentifier": media.get("identifier"),
                    "gbifReferences": media.get("references") or occurrence.get("references"),
                    "created": datetime.now().isoformat(),
                    "caption": media.get("title") or media.get("description")
                }
                if thumbnail_filename:
                    image_entry["thumbnail"] = thumbnail_filename
                    selection_record["thumbnail_file"] = thumbnail_path
                    selection_record["thumbnail"] = thumbnail_filename
                if large_filename:
                    image_entry["large"] = large_filename
                    selection_record["large"] = large_filename
                    if large_path:
                        selection_record["large_file_path"] = large_path

                should_replace = overwrite and not preview_only and step_id not in cleared_steps
                if apply_image_to_step(roadmap, step_id, image_entry, replace_existing=should_replace):
                    roadmap_modified = True
                    if step_id not in updated_steps:
                        updated_steps.append(step_id)
                    if should_replace:
                        cleared_steps.add(step_id)
                else:
                    print(f"    ⚠️ Warning: could not annotate roadmap for step '{step_id}'")

                selection_record.update({
                    "status": "downloaded",
                    "image_url": identifier,
                    "file_path": destination_path,
                    "license": media.get("license"),
                    "rights_holder": media.get("rightsHolder")
                })

                downloaded_count += 1
                results.append(selection_record)
                print(f"    ✅ Saved selection image to {destination_path}")

            continue

        name_candidates = build_name_candidates(step)
        if not name_candidates:
            record.update({
                "status": "skipped",
                "message": "No name candidates available"
            })
            skipped_count += 1
            results.append(record)
            print("    ⚠️ Skipped: no species name candidates")
            continue

        species_match = match_species(session, name_candidates)
        if not species_match:
            record.update({
                "status": "skipped",
                "message": "No species match found",
                "name_candidates": name_candidates
            })
            skipped_count += 1
            results.append(record)
            print("    ⚠️ No species match found")
            continue

        usage_key = species_match.get("usageKey")
        record.update({
            "usage_key": usage_key,
            "matched_name": species_match.get("scientificName"),
            "confidence": species_match.get("confidence")
        })

        try:
            response = session.get(
                GBIF_OCCURRENCE_SEARCH_URL,
                params={
                    "taxonKey": usage_key,
                    "mediaType": "StillImage",
                    # 🔐 Only CC0 + CC BY at occurrence/dataset level
                    "license": ["CC0_1_0", "CC_BY_4_0"],
                    "limit": 20
                },
                timeout=30
            )
            response.raise_for_status()
        except requests.RequestException as exc:
            record.update({
                "status": "error",
                "message": f"Occurrence search failed: {exc}"
            })
            error_count += 1
            results.append(record)
            print(f"    ❌ Occurrence search failed: {exc}")
            continue

        try:
            occurrence_data = response.json()
        except ValueError as exc:
            record.update({
                "status": "error",
                "message": f"Invalid JSON response: {exc}"
            })
            error_count += 1
            results.append(record)
            print(f"    ❌ Failed to parse occurrence response: {exc}")
            continue

        occurrences = occurrence_data.get("results", [])

        if preview_only:
            candidates = build_media_previews(occurrences, usage_key, candidates_per_step)
            if not candidates:
                record.update({
                    "status": "skipped",
                    "message": "No usable still images found"
                })
                skipped_count += 1
                results.append(record)
                print("    ⚠️ No usable still images found")
                continue

            record.update({
                "status": "preview",
                "candidates": candidates
            })
            previewed_count += 1
            results.append(record)
            print(f"    🔭 Found {len(candidates)} candidate(s) for preview")
            continue

        preferred = select_preferred_media(occurrences)
        if not preferred:
            record.update({
                "status": "skipped",
                "message": "No usable still images found"
            })
            skipped_count += 1
            results.append(record)
            print("    ⚠️ No usable still images found")
            continue

        occurrence = preferred["occurrence"]
        media = preferred["media"]
        identifier = media.get("identifier")
        if not identifier:
            record.update({
                "status": "skipped",
                "message": "Media identifier missing"
            })
            skipped_count += 1
            results.append(record)
            print("    ⚠️ Media identifier missing")
            continue

        occurrence_key = occurrence.get("key")

        filename_base = f"{step_id}-{occurrence_key}"
        filename = f"{filename_base}.webp"
        destination_path = os.path.join(output_dir, filename)

        if path_exists(destination_path) and not overwrite:
            record.update({
                "status": "skipped",
                "message": "File already exists",
                "occurrence_key": occurrence_key,
                "file_path": destination_path
            })
            skipped_count += 1
            results.append(record)
            print("    ⏭️  Skipped (file exists)")
            continue

        try:
            image_response = session.get(identifier, timeout=90)
            image_response.raise_for_status()
        except requests.RequestException as exc:
            record.update({
                "status": "error",
                "message": f"Image download failed: {exc}",
                "image_url": identifier
            })
            error_count += 1
            results.append(record)
            print(f"    ❌ Image download failed: {exc}")
            continue

        save_result = save_generated_image(
            image_data=image_response.content,
            filename=filename,
            output_dir=output_dir,
            metadata=None,
            max_width=target_max_width,
            force_webp=True,
            thumbnail_size=256
        )

        if not save_result.get("success", True):
            record.update({
                "status": "error",
                "message": save_result.get("error", "Failed to save image"),
                "image_url": identifier
            })
            error_count += 1
            results.append(record)
            print(f"    ❌ Failed to save image: {save_result.get('error')}")
            continue

        destination_path = save_result.get("image_path", destination_path)
        final_filename = os.path.basename(destination_path)
        thumbnail_filename = save_result.get("thumbnail_filename")
        large_filename, large_path = save_large_variant(
            image_data=image_response.content,
            final_filename=final_filename,
            output_dir=output_dir
        )

        image_entry = {
            "file": final_filename,
            "source": identifier,
            "license": media.get("license"),
            "rightsHolder": media.get("rightsHolder"),
            "publisher": occurrence.get("publisher"),
            "gbifOccurrenceId": occurrence_key,
            "gbifTaxonKey": usage_key,
            "gbifMediaIdentifier": media.get("identifier"),
            "gbifReferences": media.get("references") or occurrence.get("references"),
            "created": datetime.now().isoformat(),
            "caption": media.get("title") or media.get("description")
        }
        if thumbnail_filename:
            image_entry["thumbnail"] = thumbnail_filename
            record["thumbnail"] = thumbnail_filename
        if large_filename:
            image_entry["large"] = large_filename
            record["large"] = large_filename
            if large_path:
                record["large_file_path"] = large_path

        record.update({
            "status": "downloaded",
            "image_url": identifier,
            "file_path": destination_path,
            "occurrence_key": occurrence_key,
            "license": media.get("license"),
            "rights_holder": media.get("rightsHolder")
        })

        should_replace = overwrite and not preview_only and step_id not in cleared_steps
        save_result = save_generated_image(
            image_data=image_response.content,
            filename=filename,
            output_dir=output_dir,
            metadata=None,
            max_width=target_max_width,
            force_webp=True
        )
        destination_path = save_result.get("image_path", destination_path)

        if apply_image_to_step(roadmap, step_id, image_entry, replace_existing=should_replace):
            roadmap_modified = True
            if step_id not in updated_steps:
                updated_steps.append(step_id)
            if should_replace:
                cleared_steps.add(step_id)
        else:
            print(f"    ⚠️ Warning: could not annotate roadmap for step '{step_id}'")

        downloaded_count += 1
        results.append(record)
        print(f"    ✅ Saved to {destination_path}")

    session.close()

    backup_roadmap_path: Optional[str] = None
    original_roadmap_path = f"output/can-do-steps/{run_id}/roadmap-{run_id}.json"

    if not preview_only and roadmap_modified:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        backup_roadmap_path = f"output/can-do-steps/{run_id}/archived/roadmap-{run_id}-{timestamp}.json"

        try:
            if path_exists(original_roadmap_path):
                copy_path(original_roadmap_path, backup_roadmap_path)
                print(f"✅ Created roadmap backup: {backup_roadmap_path}")

            write_json(original_roadmap_path, roadmap)

            print(f"✅ Updated roadmap with image references: {original_roadmap_path}")
        except OSError as exc:
            print(f"❌ Failed to update roadmap file: {exc}")
            backup_roadmap_path = None

    summary: Dict[str, Any] = {
        "run_id": run_id,
        "steps_considered": len(steps),
        "downloaded": downloaded_count,
        "previewed": previewed_count,
        "skipped": skipped_count,
        "errors": error_count,
        "overwrite": overwrite,
        "preview_mode": preview_only,
        "candidates_per_step": candidates_per_step if preview_only else None,
        "roadmap_updated": roadmap_modified if not preview_only else False,
        "updated_step_ids": updated_steps,
        "roadmap_path": original_roadmap_path,
        "backup_roadmap_path": backup_roadmap_path,
        "output_directory": output_dir,
        "image_max_width": target_max_width,
        "steps_cleared": sorted(cleared_steps),
        "selection_manifest_path": selection_manifest_path,
        "selection_manifest_used": bool(selection_manifest) and not preview_only,
        "results": results
    }

    if preview_only:
        summary["preview_html_path"] = None

    output_step = "fetch_images_preview" if preview_only else "fetch_images"
    output_subfolder = "previews" if preview_only else "images"
    output_path = save_output(
        data=summary,
        pipeline="can-do-steps",
        step=output_step,
        run_id=run_id,
        subfolder=output_subfolder
    )

    summary["summary_path"] = output_path

    if preview_only:
        html_path = output_path.replace(".json", ".html")
        try:
            html_content = build_preview_gallery_html(results, run_id)
            write_text(html_path, html_content, content_type="text/html; charset=utf-8")
            summary["preview_html_path"] = html_path
            try:
                write_json(output_path, summary)
            except OSError as exc:
                print(f"⚠️ Unable to update preview JSON with HTML path: {exc}")
        except OSError as exc:
            print(f"❌ Failed to write preview gallery HTML: {exc}")
            summary["preview_html_path"] = None

    if preview_only:
        print(f"\n🎉 Image preview complete: {previewed_count} steps with candidates, {skipped_count} skipped, {error_count} errors")
    else:
        print(f"\n🎉 Image fetch complete: {downloaded_count} downloaded, {skipped_count} skipped, {error_count} errors")
        if overwrite and cleared_steps:
            print(f"   ↺ Cleared and replaced images for {len(cleared_steps)} step(s)")

    return summary


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Fetch GBIF images for can-do-steps roadmap")
    parser.add_argument("--run-id", required=True, help="Run identifier")
    parser.add_argument("--overwrite", action="store_true", help="Overwrite existing image files")
    parser.add_argument("--limit", type=int, help="Limit number of steps to process")
    parser.add_argument("--preview-only", action="store_true", help="Generate preview metadata without downloading images")
    parser.add_argument("--candidates-per-step", type=int, default=6, help="Number of candidate images to include per step when previewing")
    parser.add_argument("--max-width", type=int, default=800, help="Maximum width in pixels for downloaded images")
    args = parser.parse_args()

    fetch_images(
        run_id=args.run_id,
        overwrite=args.overwrite,
        limit=args.limit,
        preview_only=args.preview_only,
        candidates_per_step=args.candidates_per_step,
        max_width=args.max_width
    )
