<?php
/**
 * HTML Minifier Service
 *
 * Provides HTML, CSS, and JavaScript minification with configurable modes.
 *
 * @package Mamba\Modules\Bloat\Services
 * @since   1.0.0
 */

namespace Mamba\Modules\Bloat\Services;

/**
 * Class HtmlMinifier
 *
 * HTML/CSS/JS minifier with two modes:
 * - conservative: HTML-only, removes regular comments (keeps conditional), collapses inter-tag whitespace, trims outer whitespace
 * - aggressive: applies legacy MU logic to collapse whitespace in HTML, inline CSS and JS, and remove comments (keeps conditional)
 *
 * @since 1.0.0
 */
final class HtmlMinifier {
    public static function minify(string $html, string $mode = 'conservative'): string {
        // Quick guards
        if ($html === '' || stripos($html, '<html') === false) {
            return $html;
        }
        $mode = ($mode === 'aggressive') ? 'aggressive' : 'conservative';
        return $mode === 'aggressive' ? self::aggressive($html) : self::conservative($html);
    }

    private static function conservative(string $html): string {
        // Extract sensitive blocks and replace with placeholders
        $placeholders = [];
        $patterns = [
            'script'   => '~<script\b[^>]*>.*?</script>~is',
            'style'    => '~<style\b[^>]*>.*?</style>~is',
            'pre'      => '~<pre\b[^>]*>.*?</pre>~is',
            'textarea' => '~<textarea\b[^>]*>.*?</textarea>~is',
            'code'     => '~<code\b[^>]*>.*?</code>~is',
        ];
        foreach ($patterns as $type => $regex) {
            $html = preg_replace_callback($regex, function($m) use (&$placeholders, $type) {
                $k = '___MAMBA_HOLD_' . strtoupper($type) . '_' . count($placeholders) . '___';
                $placeholders[$k] = $m[0];
                return $k;
            }, $html);
        }

        // Remove regular comments but keep conditional comments
        $html = preg_replace_callback('/<!--(.*?)-->/s', function ($m) {
            $c = $m[0];
            if (preg_match('/^<!--\s*\[if.*?endif\]\s*-->$/is', $c)) return $c; // conditional
            if (preg_match('/^<!--\s*\[if.*?\]>\s*<!-->.*?<!--\s*<!\[endif]-->\s*$/is', $c)) return $c; // downlevel-revealed
            return '';
        }, $html);

        // Replace newlines and tabs with spaces
        $html = str_replace(["\r\n", "\r", "\n", "\t"], ' ', $html);
        
        // Collapse multiple spaces into one
        $html = preg_replace('/\s{2,}/', ' ', $html);
        
        // Remove spaces between tags (but preserve space in text content)
        $html = preg_replace('/>\s+</', '><', $html);
        
        // Remove leading/trailing spaces in tag attributes
        $html = preg_replace('/\s+>/', '>', $html);
        $html = preg_replace('/<\s+/', '<', $html);
        
        $html = trim($html);

        // Restore placeholders
        if ($placeholders) {
            $html = strtr($html, $placeholders);
        }
        return $html;
    }

    private static function aggressive(string $html): string {
        // Port of the provided MU logic with doctype preservation and tokenizer
        $doctype = '';
        if (preg_match('/^<!doctype.*?>/i', $html, $m)) {
            $doctype = $m[0];
            $html = preg_replace('/^<!doctype.*?>/i', '', $html, 1);
        }

        $pattern = '/
            <(?<script>script).*?<\/script\s*>     # script blocks
            | <(?<style>style).*?<\/style\s*>     # style blocks
            | <!(?<comment>--) .*? --\s*>         # comments
            | <(?<tag>[\/\w.:-]+)(?:".*?"|\'.*?\'|[^\'">]+)*> # tags
            | (?<text>([^<]+))                    # text
        /isx';

        preg_match_all($pattern, $html, $tokens, PREG_SET_ORDER);
        $output = '';
        $raw_tag = false;

        foreach ($tokens as $token) {
            $tag_name = $token['tag'] ?? null;
            $content  = $token[0];

            if (isset($token['script'])) {
                $content = self::collapseWhitespace($content); // inline JS safe-ish
            } elseif (isset($token['style'])) {
                $content = self::collapseWhitespace($content); // inline CSS
            } elseif (isset($token['comment'])) {
                if (!preg_match('/\[if.*IE.*\]/i', $content)) {
                    $content = '';
                }
            } elseif ($tag_name) {
                $name = strtolower($tag_name);
                if (in_array($name, ['pre', 'textarea'], true)) {
                    $raw_tag = $name;
                } elseif (in_array($name, ['/pre', '/textarea'], true)) {
                    $raw_tag = false;
                }
                if (!$raw_tag) {
                    $content = preg_replace('/\s+/', ' ', $content);
                    $content = str_replace(' />', '/>', $content);
                }
            } else {
                if (!$raw_tag) {
                    $content = self::collapseWhitespace($content);
                }
            }
            $output .= $content;
        }

        // Optional stats comment from original code is omitted to avoid adding bytes
        return $doctype . $output;
    }

    private static function collapseWhitespace(string $str): string {
        $str = str_replace(["\r", "\n", "\t"], ' ', $str);
        return preg_replace('/ {2,}/', ' ', $str);
    }
}
