<?php
declare(strict_types=1);

namespace ZiziCache;

/**
 * HTML Parser and Manipulation
 * 
 * This class provides functionality for parsing and manipulating HTML elements.
 * It allows for easy access and modification of HTML attributes and content.
 * 
 * Key features:
 * - Parse HTML elements and their attributes
 * - Get and set element attributes dynamically
 * - Support for self-closing tags
 * - CSS selector-like functionality for finding elements
 * - Safe HTML manipulation
 * 
 * @package ZiziCache
 */
class HTML
{
    /**
     * The original HTML tag as provided
     * @var string
     */
    public string $original_tag;
    
    /**
     * The current HTML tag content
     * @var string
     */
    private string $tag;
    
    /**
     * Whether the tag is self-closing
     * @var bool
     */
    private bool $self_closing;

    /**
     * Constructor for the HTML tag
     * 
     * @param string $tag The HTML tag to parse and manipulate
     */
    public function __construct(string $tag)
    {
        $this->original_tag = $tag;
        $this->tag = $tag;
        $this->self_closing = preg_match('/\/>$/', $tag) > 0;
    }

    /**
     * Get the inner content of the HTML tag
     * 
     * @return string The inner content of the tag, or an empty string for self-closing tags
     */
    public function getContent(): string
    {
        if ($this->self_closing) {
            return '';
        }

        $tag_name = $this->getTagName();
        if (empty($tag_name)) {
            return ''; // Or throw an exception for invalid tag
        }
        $start = strpos($this->tag, '>');
        if ($start === false) {
             return ''; // Or throw an exception
        }
        $start += 1;
        $end = strrpos($this->tag, "</{$tag_name}>");
        if ($end === false) {
            return ''; // Or throw an exception, potentially malformed HTML
        }
        return trim(substr($this->tag, $start, $end - $start));
    }

    /**
     * Set the inner content of the HTML tag
     * 
     * @param string $content The content to set inside the tag
     * @return bool True on success, false on failure or for self-closing tags
     */
    public function setContent(string $content): bool
    {
        if ($this->self_closing) {
            return false;
        }

        $tag_name = $this->getTagName();
        if (empty($tag_name)) {
            return false; // Or throw an exception
        }
        $start = strpos($this->tag, '>');
        if ($start === false) {
            return false; // Or throw an exception
        }
        $start += 1;
        $end = strrpos($this->tag, "</{$tag_name}>");
        if ($end === false) {
            return false; // Or throw an exception, potentially malformed HTML
        }
        $this->tag = substr_replace($this->tag, $content, $start, $end - $start);
        return true;
    }    /**
     * Magic getter for HTML attributes
     * 
     * @param string $attribute The attribute name to get
     * @return string|bool|null The attribute value, true for boolean attributes, or null if not found
     */
    public function __get(string $attribute)
    {
        if (preg_match("/\s{$attribute}=([\"'])(.*?)\\1[^<]*?>/s", $this->tag, $matches)) {
            return $matches[2];
        } elseif (preg_match("/\s{$attribute}(\s|>)/s", $this->tag)) {
            return true; // Return boolean true for boolean attributes
        }
        return null;
    }    /**
     * Magic setter for HTML attributes
     * 
     * @param string $attribute The attribute name to set
     * @param mixed $value The value to set (null or true for boolean attributes)
     * @return void
     */
    public function __set(string $attribute, $value = null): void
    {
        $attribute_string = '';
        if ($value === null || $value === 'true' || $value === true) { // Handling boolean attribute
            $attribute_string = $attribute;
        } elseif (is_string($value) && strpos($value, '"') !== false && strpos($value, "'") === false) {
            $attribute_string = "{$attribute}='" . (string)$value . "'";
        } else {
            $attribute_string = "{$attribute}=\"" . (string)$value . "\"";
        }

        // Check if attribute exists to replace it, otherwise add it
        if (preg_match("/\s{$attribute}(=([\"']).*?\\2)?(?=\s|>|\/)/s", $this->tag)) {
            $this->tag = preg_replace(
                "/\s{$attribute}(=([\"']).*?\\2)?(?=\s|>|\/)/s",
                " {$attribute_string}",
                $this->tag,
                1
            );
        } else {
            // Add attribute before closing tag character > or />
            // But ensure we're not inserting after a closing tag like </script>
            if (preg_match('/<\/[a-zA-Z][a-zA-Z0-9]*>$/s', $this->tag)) {
                // For tags with closing elements like </script>, find the opening tag's end
                preg_match('/^<[^>]*>/s', $this->tag, $openingTag);
                if (!empty($openingTag[0])) {
                    $openingTagEnd = strlen($openingTag[0]) - 1; // position of '>' in opening tag
                    $this->tag = substr_replace($this->tag, " {$attribute_string}", $openingTagEnd, 0);
                }
            } else {
                // For void elements or self-closing tags like <img/> or <br>
                $this->tag = preg_replace('/(>|\/>)$/s', " {$attribute_string}$1", $this->tag, 1);
            }
        }
    }

    /**
     * Magic unsetter for HTML attributes
     * 
     * @param string $attribute The attribute name to remove
     * @return void
     */
    public function __unset(string $attribute): void
    {
        $this->tag = preg_replace("/\s{$attribute}(=([\"']).*?\\2)?(?=\s|>|\/)/s", '', $this->tag);
    }

    /**
     * Get the tag name of the HTML element
     * 
     * @return string The tag name in lowercase, or an empty string if not found
     */
    public function getTagName(): string
    {
        if (preg_match('/<([a-zA-Z0-9_:-]+)[\s|>]/s', $this->tag, $matches)) {
            return $matches[1];
        }
        return '';
    }

    /**
     * @return array<int, array{0: string, 1: int}>
     */
    public function getTagsBySelector(string $selector): array
    {
        $matches = [];
        if (empty($selector)) {
            return [];
        }

        if ($selector[0] === '.') {
            $class = substr($selector, 1);
            if (empty($class) || !preg_match('/^[a-zA-Z0-9_-]+$/', $class)) return []; // Basic validation
            preg_match_all(
                "/(<[^>]*class=[\"'][^\"']*\\b{$class}\\b[^\"']*[\"'][^>]*>)/s",
                $this->tag,
                $matches,
                PREG_OFFSET_CAPTURE
            );
        } elseif ($selector[0] === '#') {
            $id = substr($selector, 1);
            if (empty($id) || !preg_match('/^[a-zA-Z0-9_-]+$/', $id)) return []; // Basic validation
            preg_match_all(
                "/(<[^>]*id=[\"']{$id}[\"'][^>]*>)/s",
                $this->tag,
                $matches,
                PREG_OFFSET_CAPTURE
            );
        } elseif ($selector[0] === '[' && $selector[strlen($selector) - 1] === ']') {
            $attribute_selector = substr($selector, 1, -1);
            // More robust attribute selector parsing could be added here
            if (empty($attribute_selector) || !preg_match('/^[a-zA-Z0-9_:-]+(\s*=\s*([\"].*?[\"]|[^\\s>]+))?$/', $attribute_selector)) return [];
            preg_match_all(
                "/(<[^>]*\s{$attribute_selector}(?:\s|=)[^>]*>)/s", // Simplified, might need refinement
                $this->tag,
                $matches,
                PREG_OFFSET_CAPTURE
            );
        } else {
            $tag_name = $selector;
            if (empty($tag_name) || !preg_match('/^[a-zA-Z0-9_:-]+$/', $tag_name)) return []; // Basic validation
            preg_match_all("/(<{$tag_name}[\s>][^>]*>)/s", $this->tag, $matches, PREG_OFFSET_CAPTURE);
        }
        return $matches[1] ?? [];
    }

    /**
     * @return string[]
     */
    public function getElementsBySelector(string $selector): array
    {
        $tags = $this->getTagsBySelector($selector);

        if (empty($tags)) {
            return [];
        }

        $captured_elements = [];
        // Ensure $this->tag is not empty to avoid errors with preg_match_all
        if (empty($this->tag)) {
            return [];
        }

        preg_match_all('/<[^>]*>/',
            $this->tag,
            $all_tags_matches,
            PREG_OFFSET_CAPTURE
        );
        if (empty($all_tags_matches[0])) {
            return [];
        }

        foreach ($tags as $tag_match) {
            $stack = [];
            $tag_content = $tag_match[0];
            $tag_offset = $tag_match[1];

            foreach ($all_tags_matches[0] as $match) {
                if ($match[1] < $tag_offset) {
                    continue;
                }

                if ($this->shouldSkip($match[0])) {
                    if ($match[1] === $tag_offset && str_ends_with($match[0], '/>')) { // Self-closing tag that was selected
                        $captured_elements[] = $match[0];
                        break; 
                    }
                    continue;
                }

                if (!preg_match('/<\//', $match[0])) { // Opening tag
                    array_push($stack, $match[0]);
                } else { // Closing tag
                    if (!empty($stack)) {
                         array_pop($stack);
                    }
                }

                if (empty($stack) && $match[1] >= $tag_offset) { // Matched the closing tag for the initial tag
                    $offset_start = $tag_offset;
                    $length = ($match[1] + strlen($match[0])) - $tag_offset;
                    $element = substr($this->tag, $offset_start, $length);
                    $captured_elements[] = $element;
                    break;
                }
            }
        }
        return $captured_elements;
    }

    /**
     * Determine if a tag should be skipped during parsing
     * 
     * @param string $tag The HTML tag to check
     * @return bool True if the tag should be skipped, false otherwise
     */
    private function shouldSkip(string $tag): bool
    {
        // Optimized pattern and added more void elements from HTML5 spec
        // https://developer.mozilla.org/en-US/docs/Glossary/Void_element
        $pattern =
            '/^<(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr|script|style|!--|!DOCTYPE)/i';
        // Removed: command, keygen (obsolete)
        // Removed: circle, rect, ellipse, line, path, poly, use, view, stop, set, image, animate, fe[a-zA-Z]+ (SVG specific, better handled by dedicated SVG parser if needed)
        return preg_match($pattern, $tag) === 1;
    }    /**
     * Convert the HTML object back to a string
     * 
     * @return string The HTML tag as a string, with proper formatting
     */
    public function __toString(): string
    {
        // Extra validation to ensure we return valid HTML structure
        // This helps catch any malformations before returning the tag
        if (preg_match('/<([a-zA-Z0-9_:-]+)[^>]*?><\/\1>$/s', $this->tag)) {
            // Tag with closing element, ensure no attributes after closing tag
            if (preg_match('/<\/[^>]+>[^<>]+$/s', $this->tag)) {
                // Found attributes after closing tag, fix the structure
                preg_match('/<([a-zA-Z0-9_:-]+)([^>]*)>.*?<\/\1>/s', $this->tag, $matches);
                if (!empty($matches)) {
                    $tagName = $matches[1];
                    $attributes = trim($matches[2]);
                    $content = $this->getContent();
                    
                    // Extract any attributes that might have been incorrectly placed after closing tag
                    preg_match('/<\/[^>]+>(.+)$/s', $this->tag, $extraAttrs);
                    if (!empty($extraAttrs[1])) {
                        $attributes .= ' ' . trim($extraAttrs[1]);
                    }
                    
                    // Rebuild the tag properly
                    return "<{$tagName} {$attributes}>{$content}</{$tagName}>";
                }
            }
        }
        
        return $this->tag;
    }
}
