"""
Test and validation script for the Can-Do-Steps pipeline.
This script tests all components and validates the pipeline functionality.
"""
import os
import sys
import json
import shutil
import traceback
from typing import Dict, Any, List

# Add the project root to the path
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

from chains.can_do_steps.expand_and_create_hierarchy import expand_and_create_hierarchy, SAMPLE_STATEMENTS
from chains.can_do_steps.split_hierarchy import split_hierarchy
from chains.can_do_steps.organize_bits import organize_bits
from chains.can_do_steps.complete_pipeline import generate_complete_can_do_hierarchy, get_pipeline_status

def test_sample_statements():
    """Test that the sample statements are properly defined."""
    print("Testing sample statements...")

    assert len(SAMPLE_STATEMENTS) == 45, f"Expected 45 sample statements, got {len(SAMPLE_STATEMENTS)}"
    print(f"✓ Found {len(SAMPLE_STATEMENTS)} sample statements")

    # Check that all statements follow can-do patterns
    can_do_prefixes = ["I can ", "I know ", "I have ", "I use ", "I understand "]
    invalid_statements = []

    for i, stmt in enumerate(SAMPLE_STATEMENTS):
        if not any(stmt.startswith(prefix) for prefix in can_do_prefixes):
            invalid_statements.append((i, stmt))

    if invalid_statements:
        print("⚠ Found statements that don't follow can-do patterns:")
        for i, stmt in invalid_statements:
            print(f"   {i}: {stmt}")
    else:
        print("✓ All sample statements follow proper can-do patterns")

    print("✓ Sample statements test passed\n")

def test_pipeline_imports():
    """Test that all pipeline components can be imported."""
    print("Testing pipeline imports...")

    try:
        # Test that all main functions can be imported
        from chains.can_do_steps.expand_and_create_hierarchy import expand_and_create_hierarchy
        from chains.can_do_steps.split_hierarchy import split_hierarchy
        from chains.can_do_steps.organize_bits import organize_bits
        from chains.can_do_steps.complete_pipeline import generate_complete_can_do_hierarchy

        print("✓ All pipeline components imported successfully")

    except ImportError as e:
        print(f"❌ Import error: {e}")
        return False

    print("✓ Pipeline imports test passed\n")
    return True

def test_prompt_files():
    """Test that all prompt files exist and are readable."""
    print("Testing prompt files...")

    prompt_files = [
        "prompts/can-do-steps/expand_and_create_hierarchy.txt",
        "prompts/can-do-steps/organize_bits.txt",
        "prompts/can-do-steps/describe_images.txt",
        "prompts/can-do-steps/find_resources.txt",
        "prompts/can-do-steps/generate_media.txt"
    ]

    missing_files = []
    empty_files = []

    for prompt_file in prompt_files:
        if not os.path.exists(prompt_file):
            missing_files.append(prompt_file)
            continue

        try:
            with open(prompt_file, 'r', encoding='utf-8') as f:
                content = f.read().strip()
                if len(content) < 100:  # Arbitrary minimum length
                    empty_files.append(prompt_file)
                else:
                    print(f"✓ {prompt_file}: {len(content)} characters")
        except Exception as e:
            print(f"❌ Error reading {prompt_file}: {e}")

    if missing_files:
        print(f"❌ Missing prompt files: {missing_files}")
        return False

    if empty_files:
        print(f"❌ Empty or very short prompt files: {empty_files}")
        return False

    print("✓ All prompt files exist and have content\n")
    return True

def test_json_export_format():
    """Test the JSON data format validation (without deprecated export_json module)."""
    print("Testing JSON data format validation...")

    # Test basic data structure validation for tracks
    sample_tracks = [
        {
            "id": "test-track",
            "slug": "test-track",
            "title": "Test Track",
            "description": "This is a test track for validation",
            "order": 1
        }
    ]

    # Validate track structure
    expected_track_fields = ["id", "slug", "title", "description", "order"]
    for field in expected_track_fields:
        assert field in sample_tracks[0], f"Missing field '{field}' in track"

    print("✓ Track format validation is correct")

    # Test basic data structure validation for paths
    sample_paths = [
        {
            "id": "test-path",
            "slug": "test-path",
            "title": "Test Path",
            "description": "This is a test path for validation",
            "trackId": "test-track",
            "order": 1
        }
    ]

    # Validate path structure
    expected_path_fields = ["id", "slug", "title", "description", "trackId", "order"]
    for field in expected_path_fields:
        assert field in sample_paths[0], f"Missing field '{field}' in path"

    print("✓ Path format validation is correct")

    # Test basic data structure validation for steps
    sample_steps = [
        {
            "id": "test-step",
            "slug": "test-step",
            "title": "I can do a test",
            "description": "This is a test step for validation",
            "level": "beginner",
            "pathId": "test-path",
            "order": 1
        }
    ]

    # Validate step structure
    expected_step_fields = ["id", "slug", "title", "description", "level", "pathId", "order"]
    for field in expected_step_fields:
        assert field in sample_steps[0], f"Missing field '{field}' in step"

    print("✓ Step format validation is correct")

    # Test basic data structure validation for bits
    sample_bits = [
        {
            "id": "test-bit",
            "slug": "test-bit",
            "title": "Understanding Test Concepts",
            "content": "Learn the basic concepts needed for testing",
            "stepId": "test-step",
            "order": 1
        }
    ]

    # Validate bit structure
    expected_bit_fields = ["id", "slug", "title", "content", "stepId", "order"]
    for field in expected_bit_fields:
        assert field in sample_bits[0], f"Missing field '{field}' in bit"

    print("✓ Bit format validation is correct")
    print("✓ JSON data format validation test passed\n")
    return True

def test_validation_functions():
    """Test the data validation functions."""
    print("Testing validation functions...")

    # Test parsing validation (this would need actual LLM output to test properly)
    # For now, just test that the functions exist and can be called
    from chains.can_do_steps.expand_and_create_hierarchy import parse_expanded_hierarchy
    from chains.can_do_steps.organize_bits import parse_organized_bits
    from chains.can_do_steps.describe_images import parse_media_output

    print("✓ All parsing functions exist")

    # Test validation functions from split_hierarchy
    from chains.can_do_steps.split_hierarchy import validate_hierarchy_consistency

    print("✓ Validation functions exist")
    print("✓ Validation functions test passed\n")
    return True

def run_basic_pipeline_test(test_run_id: str = "test_pipeline"):
    """
    Run a basic test of the pipeline using force_text mode.
    Note: This won't actually call the LLM, just test the structure.
    """
    print(f"Running basic pipeline structure test with run_id: {test_run_id}...")

    try:
        # Test pipeline status
        status = get_pipeline_status(test_run_id)
        print(f"✓ Pipeline status retrieved: {len(status['phases_available'])} phases available")

        # Test that we can at least instantiate the pipeline
        print("✓ Pipeline structure appears correct")

    except Exception as e:
        print(f"❌ Pipeline structure test failed: {e}")
        traceback.print_exc()
        return False

    print("✓ Basic pipeline structure test passed\n")
    return True

def test_generate_media():
    """Test the generate_media functionality with mocked dependencies."""
    print("Testing generate_media functionality...")

    test_run_id = "test-media-generation"

    try:
        # Create test roadmap file with existing illustration descriptions
        test_roadmap = {
            "id": test_run_id,
            "title": "Test Media Generation",
            "illustration": "A comprehensive overview showing the complete learning journey",
            "tracks": [
                {
                    "id": "test-track",
                    "slug": "test-track",
                    "title": "Test Track",
                    "description": "Test track for media generation",
                    "illustration": "A clean educational interface showing test concepts with interactive elements",
                    "order": 1,
                    "paths": [
                        {
                            "id": "test-path",
                            "slug": "test-path",
                            "title": "Test Path",
                            "description": "Test path for media generation",
                            "illustration": "A step-by-step pathway diagram with clear progression markers",
                            "order": 1,
                            "steps": [
                                {
                                    "id": "test-step",
                                    "slug": "test-step",
                                    "title": "I can test media generation",
                                    "description": "Test step for media generation",
                                    "illustration": "A focused view of a specific skill being demonstrated",
                                    "level": "beginner",
                                    "order": 1
                                }
                            ]
                        }
                    ]
                }
            ]
        }

        # Ensure test directory exists
        test_output_dir = f"output/can-do-steps/{test_run_id}"
        os.makedirs(test_output_dir, exist_ok=True)

        # Save test roadmap file
        roadmap_path = f"{test_output_dir}/roadmap-{test_run_id}.json"
        with open(roadmap_path, 'w', encoding='utf-8') as f:
            json.dump(test_roadmap, f, indent=2)

        print(f"✓ Created test roadmap file: {roadmap_path}")

        # Test the new generate_media functions
        from chains.can_do_steps.generate_media import (
            generate_media,
            load_roadmap_file,
            extract_existing_illustrations,
            filter_illustrations_by_target_level
        )

        # Test that the generate_media function can load the roadmap
        loaded_roadmap = load_roadmap_file(test_run_id)
        assert loaded_roadmap is not None
        assert loaded_roadmap["id"] == test_run_id

        print("✓ Roadmap loading works correctly")

        # Test illustration extraction
        illustrations = extract_existing_illustrations(loaded_roadmap)
        assert len(illustrations) == 4  # roadmap + track + path + step
        assert illustrations[0]["type"] == "roadmap"
        assert illustrations[1]["type"] == "track"
        assert illustrations[2]["type"] == "path"
        assert illustrations[3]["type"] == "step"

        print("✓ Illustration extraction works correctly")

        # Test filtering by target level
        track_illustrations = filter_illustrations_by_target_level(illustrations, "track")
        assert len(track_illustrations) == 1
        assert track_illustrations[0]["type"] == "track"

        step_illustrations = filter_illustrations_by_target_level(illustrations, "step")
        assert len(step_illustrations) == 1
        assert step_illustrations[0]["type"] == "step"

        print("✓ Illustration filtering works correctly")

        # Test styling guidance extraction
        from chains.can_do_steps.generate_media import (
            get_styling_guidance,
            resolve_generate_media_prompt,
        )
        styling_guidance = get_styling_guidance()
        assert len(styling_guidance) > 0, "Styling guidance should not be empty"
        assert "whimsical" in styling_guidance.lower() or "children" in styling_guidance.lower(), "Should contain style keywords"

        custom_chain, custom_prompt = resolve_generate_media_prompt("za-birds")
        assert custom_chain.endswith("za-birds"), "Custom chain name should include run_id"
        assert custom_prompt.endswith("generate_media_za-birds.txt"), "Custom prompt path should point to run-specific file"

        custom_guidance = get_styling_guidance(run_id="za-birds")
        assert len(custom_guidance) > 0, "Custom styling guidance should not be empty"

        print("✓ Styling guidance extraction works correctly")

        # Test the main generate_media function (without actual image generation)
        result = generate_media(
            run_id=test_run_id,
            overwrite=False,
            target_level="all",
            generate_images=False  # Skip actual image generation for test
        )

        assert result["run_id"] == test_run_id
        assert result["illustrations_found"] == 4
        assert result["images_generated"] == 0  # We skipped image generation
        assert len(result["illustrations"]) == 4

        print("✓ Generate media function works correctly")

        # Test with specific target level
        result_tracks = generate_media(
            run_id=test_run_id,
            overwrite=False,
            target_level="track",
            generate_images=False
        )

        assert result_tracks["illustrations_found"] == 1
        assert result_tracks["illustrations"][0]["type"] == "track"

        print("✓ Target level filtering works correctly")

        # Cleanup test files
        import shutil
        if os.path.exists(test_output_dir):
            shutil.rmtree(test_output_dir)

        print("✓ Test cleanup completed")

    except Exception as e:
        print(f"❌ Generate media test failed: {e}")
        traceback.print_exc()
        return False

    print("✓ Generate media test passed\n")
    return True


def test_fetch_audio(monkeypatch):
    """Test the fetch_audio functionality with mocked Xeno-Canto responses."""
    print("Testing fetch_audio functionality...")

    test_run_id = "test-fetch-audio"
    test_output_dir = f"output/can-do-steps/{test_run_id}"

    try:
        if os.path.exists(test_output_dir):
            shutil.rmtree(test_output_dir)
        os.makedirs(test_output_dir, exist_ok=True)

        test_roadmap = {
            "id": test_run_id,
            "title": "Test Fetch Audio",
            "tracks": [
                {
                    "id": "test-track",
                    "title": "Test Track",
                    "paths": [
                        {
                            "id": "test-path",
                            "title": "Test Path",
                            "steps": [
                                {
                                    "id": "test-step",
                                    "title": "Identify Helmeted Guineafowl",
                                    "description": "Numida meleagris",
                                    "level": "beginner",
                                    "order": 1
                                }
                            ]
                        }
                    ]
                }
            ]
        }

        roadmap_path = f"{test_output_dir}/roadmap-{test_run_id}.json"
        with open(roadmap_path, "w", encoding="utf-8") as file:
            json.dump(test_roadmap, file, indent=2)

        print(f"✓ Created test roadmap file: {roadmap_path}")

        mock_recording_url = "https://www.xeno-canto.org/12345/download.mp3"

        class MockResponse:
            def __init__(self, *, json_data=None, content=None, status_code=200):
                self._json_data = json_data
                self.content = content or b""
                self.status_code = status_code

            def raise_for_status(self):
                if self.status_code >= 400:
                    raise requests.HTTPError(f"Status: {self.status_code}")

            def json(self):
                if self._json_data is None:
                    raise ValueError("No JSON data available")
                return self._json_data

        class MockSession:
            def __init__(self):
                self.headers = {}
                self.calls: List[str] = []

            def get(self, url, params=None, timeout=None):
                if XENO_CANTO_API_URL in url:
                    self.calls.append("api")
                    return MockResponse(
                        json_data={
                            "recordings": [
                                {
                                    "id": "999",
                                    "q": "A",
                                    "file": mock_recording_url,
                                    "lic": "CC-BY",
                                    "length": "0:25"
                                }
                            ]
                        }
                    )
                if mock_recording_url in url:
                    self.calls.append("audio")
                    return MockResponse(content=b"fake audio bytes", status_code=200)
                return MockResponse(status_code=404)

        import requests
        from chains.can_do_steps.fetch_audio import fetch_audio, XENO_CANTO_API_URL

        monkeypatch.setenv("XENO_CANTO_API_KEY", "test-key")
        monkeypatch.setattr(requests, "Session", lambda: MockSession())

        result = fetch_audio(run_id=test_run_id, overwrite=True)

        audio_dir = os.path.join(test_output_dir, "audio")
        expected_path = os.path.join(audio_dir, "test-step.mp3")

        assert os.path.exists(expected_path)
        assert result["downloaded"] == 1
        assert result["skipped"] == 0
        assert result["errors"] == 0
        assert result["results"][0]["status"] == "downloaded"
        assert result["roadmap_updated"] is True
        assert "test-step" in result["updated_step_ids"]
        assert os.path.exists(result["roadmap_path"])
        assert result["backup_roadmap_path"] is not None
        assert os.path.exists(result["backup_roadmap_path"])

        with open(result["roadmap_path"], "r", encoding="utf-8") as updated_file:
            updated_roadmap = json.load(updated_file)

        audio_field = updated_roadmap["tracks"][0]["paths"][0]["steps"][0].get("audio")
        assert audio_field == "test-step.mp3"

        print("✓ fetch_audio downloaded expected file")

        if os.path.exists(test_output_dir):
            shutil.rmtree(test_output_dir)

        print("✓ Test cleanup completed")

    except Exception as exc:
        print(f"❌ Fetch audio test failed: {exc}")
        traceback.print_exc()
        return False

    print("✓ Fetch audio test passed\n")
    return True


def test_describe_images():
    """Test the describe_images functionality with mocked dependencies."""
    print("Testing describe_images functionality...")

    test_run_id = "test-describe-images"

    try:
        # Create test roadmap file without existing illustration descriptions
        test_roadmap = {
            "id": test_run_id,
            "title": "Test Describe Images",
            "description": "Test roadmap for describe images functionality",
            "tracks": [
                {
                    "id": "test-track",
                    "slug": "test-track",
                    "title": "Test Track",
                    "description": "Test track for describe images",
                    "order": 1,
                    "paths": [
                        {
                            "id": "test-path",
                            "slug": "test-path",
                            "title": "Test Path",
                            "description": "Test path for describe images",
                            "order": 1,
                            "steps": [
                                {
                                    "id": "test-step",
                                    "slug": "test-step",
                                    "title": "I can test describe images",
                                    "description": "Test step for describe images",
                                    "level": "beginner",
                                    "order": 1
                                }
                            ]
                        }
                    ]
                }
            ]
        }

        # Ensure test directory exists
        test_output_dir = f"output/can-do-steps/{test_run_id}"
        os.makedirs(test_output_dir, exist_ok=True)

        # Save test roadmap file
        roadmap_path = f"{test_output_dir}/roadmap-{test_run_id}.json"
        with open(roadmap_path, 'w', encoding='utf-8') as f:
            json.dump(test_roadmap, f, indent=2)

        print(f"✓ Created test roadmap file: {roadmap_path}")

        # Test the describe_images functions
        from chains.can_do_steps.describe_images import (
            describe_images,
            load_roadmap_file,
            extract_content_for_prompts,
            parse_media_output
        )

        # Test that the describe_images function can load the roadmap
        loaded_roadmap = load_roadmap_file(test_run_id)
        assert loaded_roadmap is not None
        assert loaded_roadmap["id"] == test_run_id

        print("✓ Roadmap loading works correctly")

        # Test content extraction
        content = extract_content_for_prompts(loaded_roadmap, "all")
        assert "TRACKS" in content
        assert "test-track" in content
        assert "PATHS" in content
        assert "test-path" in content
        assert "STEPS" in content
        assert "test-step" in content

        print("✓ Content extraction works correctly")

        # Test the parsing function with mock output
        mock_llm_output = """
        {
            "illustrations": [
                {
                    "type": "track",
                    "id": "test-track",
                    "title": "Test Track",
                    "description": "Test track for describe images",
                    "illustration": "A clean educational interface showing test concepts with interactive elements",
                    "priority": "high",
                    "style_notes": "Child-friendly cartoon style"
                }
            ],
            "generation_summary": {
                "total_items": 1,
                "illustrations_generated": 1,
                "skipped_items": [],
                "style_theme": "Clean educational design"
            }
        }
        """

        # Test parsing function
        parsed = parse_media_output(mock_llm_output)
        assert "illustrations" in parsed
        assert "generation_summary" in parsed
        assert len(parsed["illustrations"]) == 1
        assert parsed["illustrations"][0]["type"] == "track"

        print("✓ Description output parsing works correctly")

        # Test that the function would create proper backup and update paths
        # (We're not actually calling describe_images with LLM here, just testing the structure)

        print("✓ Describe images structure validation completed")

        # Cleanup test files
        import shutil
        if os.path.exists(test_output_dir):
            shutil.rmtree(test_output_dir)

        print("✓ Test cleanup completed")

    except Exception as e:
        print(f"❌ Describe images test failed: {e}")
        traceback.print_exc()
        return False

    print("✓ Describe images test passed\n")
    return True

def run_comprehensive_tests():
    """Run all tests and return overall success."""
    print("=== Can-Do-Steps Pipeline Test Suite ===\n")

    tests = [
        ("Pipeline Imports", test_pipeline_imports),
        ("Prompt Files", test_prompt_files),
        ("JSON Data Format", test_json_export_format),
        ("Validation Functions", test_validation_functions),
        ("Describe Images", test_describe_images),
        ("Generate Media", test_generate_media),
        ("Fetch Audio", test_fetch_audio),
        ("Basic Pipeline Structure", run_basic_pipeline_test),
    ]

    passed = 0
    failed = 0

    for test_name, test_function in tests:
        print(f"--- Running {test_name} Test ---")
        try:
            if test_function():
                passed += 1
                print(f"✓ {test_name} test PASSED\n")
            else:
                failed += 1
                print(f"❌ {test_name} test FAILED\n")
        except Exception as e:
            failed += 1
            print(f"❌ {test_name} test FAILED with exception: {e}")
            traceback.print_exc()
            print()

    print("=== Test Results ===")
    print(f"Total tests: {len(tests)}")
    print(f"Passed: {passed}")
    print(f"Failed: {failed}")

    if failed == 0:
        print("🎉 All tests PASSED! Pipeline is ready to use.")
        return True
    else:
        print("⚠ Some tests FAILED. Please review the errors above.")
        return False

def print_usage_instructions():
    """Print usage instructions for the pipeline."""
    print("\n" + "="*60)
    print("CAN-DO-STEPS PIPELINE USAGE INSTRUCTIONS")
    print("="*60)

    print("""
BASIC USAGE:

1. Run the complete pipeline:
   ```python
   from chains.can_do_steps.complete_pipeline import generate_complete_can_do_hierarchy

   result = generate_complete_can_do_hierarchy(
       run_id="my_run_001",
       export_json=True,
       output_dir="./my_output/"
   )
   ```

2. Run individual phases:
   ```python
   from chains.can_do_steps.expand_and_create_hierarchy import expand_and_create_hierarchy
   from chains.can_do_steps.split_hierarchy import split_hierarchy
   # ... etc

   hierarchy = expand_and_create_hierarchy("my_run_001")
   split_data = split_hierarchy("my_run_001")
   ```

3. Check pipeline status:
   ```python
   from chains.can_do_steps.complete_pipeline import get_pipeline_status

   status = get_pipeline_status("my_run_001")
   print(status)
   ```

PIPELINE PHASES (UNIFIED ARCHITECTURE):
1. expand_and_create_hierarchy: Expand 43 → 80-100 statements and create complete hierarchy
2. split_hierarchy: Split nested hierarchy into individual JSON files
3. organize_bits: Generate 1-3 learning bits per step
4. describe_images: Generate illustration descriptions for tracks, paths, and steps
5. generate_media: Generate actual images from existing illustration descriptions
6. export_json: Export all data to JSON files

OUTPUT FILES:
- tracks.json: Track definitions with descriptions
- paths.json: Path definitions with trackId references
- steps.json: Step definitions with pathId and level
- bits.json: Learning bit definitions with stepId references

FORCE TEXT MODE:
Use force_text=True to skip LLM calls and use existing outputs:
   ```python
   result = generate_complete_can_do_hierarchy(
       run_id="my_run_001",
       force_text=True
   )
   ```

For detailed architecture information, see:
chains/can-do-steps/ARCHITECTURE.md
""")

if __name__ == "__main__":
    """Run the test suite when called directly."""
    success = run_comprehensive_tests()

    if success:
        print_usage_instructions()
        sys.exit(0)
    else:
        print("\nTests failed. Please fix issues before using the pipeline.")
        sys.exit(1)