"""Parser for BUILD.bazel files to extract resmoke_suite_test configuration.

This module parses BUILD.bazel files without invoking bazel, supporting a simplified
subset of Bazel syntax:
- Simple lists of targets (no select() expressions)
- Direct file targets (e.g., "//jstests/foo:bar.js")
- all_javascript_files targets (globs *.js in directory)
- all_subpackage_javascript_files targets (recursively includes all JS from subpackages)
"""

import functools
import os
import re
from typing import Dict, List


class BazelParseError(Exception):
    """Exception raised when parsing BUILD.bazel files fails."""

    pass


@functools.cache
def parse_resmoke_suite_test(target_label: str) -> Dict[str, List[str]]:
    """Parse a resmoke_suite_test target from BUILD.bazel.
    Args:
        target_label: Bazel target label like "//buildscripts/resmokeconfig:core"
    Returns:
        Dictionary with extracted attributes:
            - srcs: List of test file labels
            - exclude_files: List of test file labels to exclude
            - exclude_with_any_tags: List of tag strings
            - include_with_any_tags: List of tag strings
    Raises:
        BazelParseError: If BUILD.bazel file not found or target not found
    """
    package, target_name = _parse_label(target_label)
    build_file = os.path.join(package, "BUILD.bazel")

    if not os.path.exists(build_file):
        raise BazelParseError(
            f"BUILD.bazel file not found at '{build_file}' for target '{target_label}'"
        )
    with open(build_file, "r") as f:
        content = f.read()

    # Find the resmoke_suite_test block
    # Pattern matches: resmoke_suite_test(name = "target_name", ...)
    pattern = r'resmoke_suite_test\s*\(\s*name\s*=\s*["\']' + re.escape(target_name) + r'["\']'
    match = re.search(pattern, content)
    if not match:
        raise BazelParseError(
            f"Target '{target_name}' not found in '{build_file}'. "
            f'Expected a resmoke_suite_test rule with name = "{target_name}"'
        )

    # Extract the rule block by finding balanced parentheses
    rule_start = match.start()
    paren_start = content.index("(", rule_start)
    paren_count = 0
    rule_end = paren_start
    for i in range(paren_start, len(content)):
        if content[i] == "(":
            paren_count += 1
        elif content[i] == ")":
            paren_count -= 1
            if paren_count == 0:
                rule_end = i + 1
                break

    if paren_count != 0:
        raise BazelParseError(
            f"Unbalanced parentheses in resmoke_suite_test definition for '{target_label}'"
        )
    rule_block = content[rule_start:rule_end]

    return {
        "srcs": _extract_attribute(rule_block, "srcs"),
        "exclude_files": _extract_attribute(rule_block, "exclude_files"),
        "exclude_with_any_tags": _extract_attribute(rule_block, "exclude_with_any_tags"),
        "include_with_any_tags": _extract_attribute(rule_block, "include_with_any_tags"),
    }


def _parse_label(target_label: str) -> tuple[str, str]:
    """Parse a Bazel target label into package path and target name.
    Args:
        target_label: A Bazel target label like "//package/path:target_name"
    Returns:
        Tuple of (package_path, target_name)
    Raises:
        BazelParseError: If the label format is invalid
    """
    if not target_label.startswith("//"):
        raise BazelParseError(
            f"Unsupported Bazel target label '{target_label}': must start with '//'"
        )
    # Remove leading "//"
    label_without_prefix = target_label[2:]

    # Split on ":"
    if ":" not in label_without_prefix:
        raise BazelParseError(
            f"Unsupported Bazel target label '{target_label}': must contain ':' separator"
        )
    package, target_name = label_without_prefix.split(":", 1)

    return package, target_name


def _extract_attribute(block: str, attribute_name: str) -> List[str]:
    """Extract an attribute from a BUILD.bazel rule block.
    Args:
        block: The text content of a BUILD.bazel rule block
        attribute_name: The name of the attribute to extract (e.g., "srcs")
    Returns:
        List of string values from the attribute. Returns empty list if attribute not found.
    """
    # Pattern to match: attribute_name = [...]
    # This handles multiline lists and ignores comments
    pattern = rf"{attribute_name}\s*=\s*\[(.*?)\]"
    match = re.search(pattern, block, re.DOTALL)
    if not match:
        return []
    list_content = match.group(1)

    # Extract quoted strings, handling both single and double quotes
    # This pattern finds strings in quotes, ignoring comments
    items = []
    for line in list_content.split("\n"):
        # Remove inline comments
        line = re.sub(r"#.*$", "", line)

        # Find all quoted strings in the line
        string_pattern = r'["\']([^"\']+)["\']'
        items.extend(re.findall(string_pattern, line))

    return items


def resolve_target_to_files(target_label: str) -> str:
    """Resolve a Bazel target label to glob patterns or file paths.
    Supported target types:
    - Direct file: "//jstests/foo:bar.js" → "jstests/foo/bar.js"
    - all_javascript_files: returns glob pattern "package/*.js"
    - all_subpackage_javascript_files: returns glob pattern "package/**/*.js"
    Args:
        target_label: Bazel target label to resolve
    Returns:
        File path or glob pattern (relative to repo root)
    Raises:
        BazelParseError: If target type is unsupported
    """
    package, target_name = _parse_label(target_label)

    if target_name.endswith(".js"):
        # Direct file reference
        return os.path.join(package, target_name)

    elif target_name == "all_javascript_files":
        # Return glob pattern for *.js in package directory
        return os.path.join(package, "*.js")

    elif target_name == "all_subpackage_javascript_files":
        # Return glob pattern for recursive **/*.js
        return os.path.join(package, "**/*.js")

    else:
        raise BazelParseError(
            f"Unsupported target type '{target_label}'. "
            f"Supported types: direct .js files, all_javascript_files, all_subpackage_javascript_files"
        )