<?php
/**
 * Cache Paths Service
 *
 * Generates secure, normalized cache file paths for the current request.
 * Includes host validation, URI sanitization, and cache poisoning prevention.
 *
 * @package Mamba\Modules\Caching\Services
 * @since   1.0.0
 */

namespace Mamba\Modules\Caching\Services;

if ( ! defined( 'ABSPATH' ) ) {
    exit;
}

/**
 * Class Paths
 *
 * Generates cache file paths with security measures including host validation,
 * URI sanitization, query normalization, and tracking parameter removal.
 *
 * @since 1.0.0
 */
final class Paths {
    public static function forCurrentRequest(): ?array {
        // SECURITY: Validate canonical host to prevent host header poisoning
        if (!self::validateCanonicalHost()) {
            define('MAMBA_CACHE_BYPASS', true);
            return null;
        }
        
        // Use canonical host for cache paths (prevents cache poisoning)
        $host = parse_url(home_url(), PHP_URL_HOST);
        $host = self::normalizeHost($host);
        
        // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitizeUri below
        $rawUri = isset($_SERVER['REQUEST_URI']) ? wp_unslash($_SERVER['REQUEST_URI']) : '/';
        $uri  = strtok($rawUri, '?') ?: '/';
        
        // Sanitize URI for filesystem safety
        $uri = self::sanitizeUri($uri);
        
        // Only extract required query parameters from $_GET (avoid processing full superglobal)
        $cacheable = apply_filters('mamba_cacheable_query_params', [
            'orderby','order','per_page','page','paged',
            'min_price','max_price','rating','rating_filter','on_sale',
            'stock_status',
            'in_stock',
            'product_cat','product_tag'
        ]);
        $query = [];
        foreach ($cacheable as $param) {
            if (isset($_GET[$param])) {
                $query[$param] = wp_unslash($_GET[$param]);
            }
        }
        // Extract dynamic WooCommerce filter/attribute params from query string
        $rawQueryString = wp_parse_url($rawUri, PHP_URL_QUERY);
        if ($rawQueryString) {
            $dynamicParts = [];
            wp_parse_str($rawQueryString, $dynamicParts);
            foreach ($dynamicParts as $k => $v) {
                if (preg_match('/^(filter_|attribute_)/', $k) && !isset($query[$k])) {
                    $query[$k] = is_array($v) ? array_map('sanitize_text_field', $v) : sanitize_text_field($v);
                }
            }
        }
        
        // SECURITY: Normalize URL and query parameters to prevent duplicate cache keys
        [$normalizedUri, $normalizedQuery] = self::normalizeUrlAndQuery($uri, $query);
        
        // Trim tracking parameters for extra safety
        $normalizedQuery = self::trimTrackingParams($normalizedQuery);
        
        $normalized = [];
        foreach ($normalizedQuery as $k=>$v) {
            if (in_array($k, $cacheable) || preg_match('/^(filter_|attribute_)/', $k)) {
                if (is_array($v)) { sort($v); $v = implode(',', array_map('sanitize_text_field',$v)); }
                else { $v = sanitize_text_field($v); }
                $normalized[] = $k.'='.$v;
            }
        }
        sort($normalized);
        $qs = $normalized ? md5(implode('&',$normalized)) : '';
        $base = WP_CONTENT_DIR . '/cache/mamba/' . $host;
        if ($normalizedUri !== '/') $base .= $normalizedUri;
        if ($qs) $base .= '/_q_' . $qs;
        $suffix = self::variantSuffix();
        $dir = trailingslashit($base);
        return ['dir'=>$dir,'file'=>$dir.'index'.$suffix.'.html','meta'=>$dir.'index'.$suffix.'.meta.json','lock'=>$dir.'index'.$suffix.'.lock'];
    }
    
    public static function forUrl(string $url, bool $mobile=false): ?array {
        $parts = wp_parse_url($url);
        if (!$parts || empty($parts['host'])) return null;
        
        // Trim tracking parameters from URL query string
        if (!empty($parts['query'])) {
            parse_str($parts['query'], $query);
            $query = self::trimTrackingParams($query);
            $parts['query'] = http_build_query($query);
        }
        
        // Use pure function instead of mutating $_SERVER
        return self::forUrlPure($parts['host'], ($parts['path'] ?? '/') . (isset($parts['query']) ? '?'.$parts['query'] : ''), $mobile);
    }
    
    /**
     * Pure function for generating cache paths from host and URI
     */
    public static function forUrlPure(string $host, string $uri, bool $mobile=false): ?array {
        // Normalize host for filesystem safety
        $host = self::normalizeHost($host);
        
        // FIX: Sanitize the path portion for filesystem safety
        if (strpos($uri, '?') !== false) {
            [$pathOnly, $qstr] = explode('?', $uri, 2);
            $pathOnly = self::sanitizeUri($pathOnly);
            $uri = $pathOnly . '?' . $qstr;
        } else {
            $uri = self::sanitizeUri($uri);
        }
        
        $query = [];
        if (strpos($uri, '?') !== false) {
            $uriParts = explode('?', $uri, 2);
            $uri = $uriParts[0];
            parse_str($uriParts[1], $query);
        }
        
        // SECURITY: Normalize URL and query parameters to prevent duplicate cache keys
        [$normalizedUri, $normalizedQuery] = self::normalizeUrlAndQuery($uri, $query);
        
        // Trim tracking parameters for extra safety
        $normalizedQuery = self::trimTrackingParams($normalizedQuery);
        
        $cacheable = apply_filters('mamba_cacheable_query_params', [
            'orderby','order','per_page','page','paged',
            'min_price','max_price','rating','rating_filter','on_sale',
            'stock_status', // ✅ add this
            'in_stock',     // keep if some themes use it
            'product_cat','product_tag'
            // Removed 'v' // WooCommerce geolocation hash parameter - prevents duplication with woo_geo suffix
        ]);
        $normalized = [];
        foreach ($normalizedQuery as $k=>$v) {
            if (in_array($k, $cacheable) || preg_match('/^(filter_|attribute_)/', $k)) {
                if (is_array($v)) { sort($v); $v = implode(',', array_map('sanitize_text_field',$v)); }
                else { $v = sanitize_text_field($v); }
                $normalized[] = $k.'='.$v;
            }
        }
        sort($normalized);
        $qs = $normalized ? md5(implode('&',$normalized)) : '';
        $base = WP_CONTENT_DIR . '/cache/mamba/' . $host;
        if ($normalizedUri !== '/') $base .= $normalizedUri;
        if ($qs) $base .= '/_q_' . $qs;
        
        // Handle mobile variant
        $suffix = self::variantSuffixPure($mobile, $query);
        $dir = trailingslashit($base);
        return ['dir'=>$dir,'file'=>$dir.'index'.$suffix.'.html','meta'=>$dir.'index'.$suffix.'.meta.json','lock'=>$dir.'index'.$suffix.'.lock'];
    }
    
    /**
     * Validate canonical host to prevent host header poisoning attacks
     * Rejects requests with non-canonical Host headers
     */
    private static function validateCanonicalHost(): bool {
        $canonicalHost = parse_url(home_url(), PHP_URL_HOST);
        // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitize_text_field
        $requestHost = isset($_SERVER['HTTP_HOST']) ? strtolower(sanitize_text_field(wp_unslash($_SERVER['HTTP_HOST']))) : '';
        
        // Remove port if present
        $requestHost = preg_replace('/:\d+$/', '', $requestHost);
        
        // Exact match
        if (hash_equals($canonicalHost, $requestHost)) {
            return true;
        }
        
        // www vs apex normalization
        if ($canonicalHost === 'www.' . $requestHost || $requestHost === 'www.' . $canonicalHost) {
            return true;
        }
        
        // Reject non-canonical hosts to prevent cache poisoning
        return false;
    }
    
    /**
     * Normalize URL and query parameters to prevent duplicate cache keys
     * Handles trailing slashes, default parameters, and pagination normalization
     */
    private static function normalizeUrlAndQuery(string $uri, array $query): array {
        // Normalize trailing slashes
        $uri = rtrim($uri, '/');
        if (empty($uri)) $uri = '/';
        
        // Normalize pagination parameters
        if (isset($query['paged']) && $query['paged'] == '1') {
            unset($query['paged']);
        }
        if (isset($query['page']) && $query['page'] == '1') {
            unset($query['page']);
        }
        
        // Normalize default orderby (menu_order is WooCommerce default)
        if (isset($query['orderby']) && $query['orderby'] === 'menu_order') {
            unset($query['orderby']);
        }
        
        // Normalize empty search
        if (isset($query['s']) && empty(trim($query['s']))) {
            unset($query['s']);
        }
        
        // Normalize default per_page (WooCommerce default is 12)
        if (isset($query['per_page']) && $query['per_page'] == '12') {
            unset($query['per_page']);
        }
        
        return [$uri, $query];
    }
    
    /**
     * Normalize host for filesystem safety
     * Converts host to lowercase and replaces unsafe characters
     */
    private static function normalizeHost(string $host): string {
        // Convert to lowercase for consistency
        $host = strtolower((string)$host);
        
        // Replace unsafe filesystem characters with underscores
        // Keep only: a-z, 0-9, dots, dashes
        $host = preg_replace('/[^a-z0-9\.\-]/', '_', $host);
        
        // Ensure host is not empty
        return $host ?: 'localhost';
    }
    
    /**
     * Sanitize URI for filesystem safety
     * Normalizes path, prevents directory traversal, and limits length
     */
    private static function sanitizeUri(string $uri): string {
        // Normalize path using WordPress function
        $uri = wp_normalize_path($uri);
        
        // Prevent directory traversal attacks
        $uri = preg_replace('#\.\./#', '', $uri);
        
        // Collapse multiple slashes
        $uri = preg_replace('#/+#', '/', $uri);
        
        // Limit path length (defensive against extreme slugs)
        $maxLength = 200; // Reasonable limit for filesystem paths
        if (strlen($uri) > $maxLength) {
            $uri = substr($uri, 0, $maxLength);
            // Ensure we don't cut in the middle of a character
            $uri = preg_replace('/[^\/]*$/', '', $uri);
            if (empty($uri)) $uri = '/';
        }
        
        return $uri;
    }
    
    /**
     * Pure function for generating variant suffix
     */
    private static function variantSuffixPure(bool $mobile=false, array $query=[]): string {
        $pieces = [];
        $pieces[] = 'd=' . ($mobile ? 'm' : 'd');
        
        // Language: prefer Polylang, then WPML constant; allow override via filter
        $detectedLang = function_exists('pll_current_language') ? pll_current_language() : (defined('ICL_LANGUAGE_CODE') ? ICL_LANGUAGE_CODE : '');
        $lang = apply_filters('mamba_variant_lang', $detectedLang);
        $lang = $lang ? sanitize_key((string)$lang) : '';
        if ($lang) $pieces[] = 'lang='.$lang;
        
        // Currency: detect common multi-currency cookies before falling back; allow override via filter
        $detectedCur = '';
        $cookieKeys = [
            'woocommerce_multicurrency_for_woo_currency', // Fox/ViWCM
            'woocs_current_currency',                     // WOOCS
            'wmc_current_currency',                       // Woo Multi-Currency
            'yay_currency',                               // YayCurrency
            'aelia_cs_selected_currency',                 // Aelia
        ];
        foreach ($cookieKeys as $ck) {
            // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitize_text_field
            if (!empty($_COOKIE[$ck])) { $detectedCur = sanitize_text_field(wp_unslash($_COOKIE[$ck])); break; }
        }
        if ($detectedCur === '') { $detectedCur = (string) get_option('woocommerce_currency',''); }
        $currency = apply_filters('mamba_variant_currency', $detectedCur);
        $currency = $currency ? sanitize_key((string)$currency) : '';
        if ($currency) $pieces[] = 'cur='.$currency;
        
        // Canonicalize geolocation - prefer WooCommerce geo hash if present, else use country
        $useGeo = get_option('woocommerce_default_customer_address') === 'geolocation_ajax';
        if ($useGeo) {
            // First check for WooCommerce geo hash (highest priority)
            $wooGeoHash = self::detectWooCommerceGeoHash();
            
            if ($wooGeoHash) {
                // Use WooCommerce geo hash as canonical geolocation
                $pieces[] = 'woo_geo=' . $wooGeoHash;
            } else {
                // Fallback to country-based geolocation
                $geoCountry = '';
                // Custom MAMBA_GEO header (highest priority for merchant control)
                if (!empty($_SERVER['HTTP_MAMBA_GEO'])) {
                    // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitize_text_field
                    $geoCountry = sanitize_text_field(wp_unslash($_SERVER['HTTP_MAMBA_GEO']));
                    // Validate immediately
                    if (!preg_match('/^[A-Z]{2}$/', strtoupper($geoCountry))) {
                        $geoCountry = '';
                    }
                }
                // Common geolocation headers (fallback)
                if (empty($geoCountry)) {
                    $hdrs = [
                        'HTTP_GEOIP_COUNTRY_CODE','HTTP_X_GEO_COUNTRY'
                    ];
                    foreach ($hdrs as $h) { 
                        if (!empty($_SERVER[$h])) { 
                            // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitize_text_field
                            $geoCountry = sanitize_text_field(wp_unslash($_SERVER[$h]));
                            // Validate immediately
                            if (preg_match('/^[A-Z]{2}$/', strtoupper($geoCountry))) {
                                break;
                            } else {
                                $geoCountry = '';
                            }
                        } 
                    }
                }
                // Woo geo cookie (best effort)
                if (empty($geoCountry) && !empty($_COOKIE['woocommerce_geo_hash'])) {
                    // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitize_text_field
                    $val = sanitize_text_field(wp_unslash($_COOKIE['woocommerce_geo_hash']));
                    // Try simple splits first
                    foreach ([':', '|', ';', ','] as $sep) {
                        $parts = explode($sep, $val);
                        if (!empty($parts[0]) && preg_match('/^[A-Z]{2}$/', strtoupper($parts[0]))) { 
                            $geoCountry = strtoupper($parts[0]); 
                            break; 
                        }
                    }
                    // Fallback regex extraction
                    if (empty($geoCountry) && preg_match('/([A-Z]{2})/', strtoupper($val), $m)) { 
                        $geoCountry = $m[1];
                        // Validate the extracted value
                        if (!preg_match('/^[A-Z]{2}$/', $geoCountry)) {
                            $geoCountry = '';
                        }
                    }
                }
                $geoCountry = apply_filters('mamba_variant_country', $geoCountry);
                $geoCountry = $geoCountry ? strtoupper(sanitize_key((string)$geoCountry)) : '';
                // Final validation before use
                if ($geoCountry && preg_match('/^[A-Z]{2}$/', $geoCountry)) $pieces[] = 'country='.$geoCountry;
            }
        }
        
        $pieces[] = 'role=guest'; // We never cache logged-in users; normalize for invalidation
        
        // Tax display mode: affects price display (incl/excl)
        $taxDisplay = get_option('woocommerce_tax_display_shop', 'incl');
        $pieces[] = 'tax=' . sanitize_key($taxDisplay);
        
        // Add role-based pricing plugin dimensions
        $roleDimensions = \Mamba\Modules\Caching\Services\Invalidation::detectRoleBasedPricing();
        if (!empty($roleDimensions)) {
            foreach ($roleDimensions as $dimension) {
                $pieces[] = $dimension;
            }
        }
        
        return '-' . md5(implode('|', $pieces));
    }
    
    /**
     * Remove tracking parameters from query array for cache safety
     * 
     * @param array $query Query parameters
     * @return array Cleaned query parameters
     */
    public static function trimTrackingParams(array $query): array {
        // Default tracking parameters to always remove
        $trackingParams = [
            // UTM parameters
            'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content',
            // Social media tracking
            'fbclid', 'gclid', 'msclkid', 'ref', 'ref_src', 'ref_url',
            // Analytics
            '_ga', '_gl', '_gac', '_gid', '_gaexp', '_gcl_au',
            // Other common tracking
            'mc_cid', 'mc_eid', 'mc_tc', 'mc_cc', 'mc_ck', 'mc_rid',
            'yclid', 'zanpid', 'zanpsid', 'zanpuid', 'zanpwid',
            'dclid', 'gbraid', 'wbraid', 'msclkid',
            // Generic tracking patterns
            'clickid', 'campaign', 'source', 'medium', 'term', 'content',
            'affiliate', 'partner', 'referrer', 'referral'
        ];
        
        // Allow filtering of tracking parameters
        $trackingParams = apply_filters('mamba_tracking_params', $trackingParams);
        
        // Remove tracking parameters
        foreach ($trackingParams as $param) {
            unset($query[$param]);
        }
        
        // Also remove any parameters matching tracking patterns
        foreach ($query as $key => $value) {
            if (preg_match('/^(utm_|fbclid|gclid|_ga|mc_|zanp|dclid|gbraid|wbraid|msclkid|clickid)/', $key)) {
                unset($query[$key]);
            }
        }
        
        return $query;
    }
    
    private static function variantSuffix(): string {
        $pieces = [];
        $pieces[] = 'd=' . (wp_is_mobile() ? 'm' : 'd');
        // Language: prefer Polylang, then WPML constant; allow override via filter
        $detectedLang = function_exists('pll_current_language') ? pll_current_language() : (defined('ICL_LANGUAGE_CODE') ? ICL_LANGUAGE_CODE : '');
        $lang = apply_filters('mamba_variant_lang', $detectedLang);
        $lang = $lang ? sanitize_key((string)$lang) : '';
        if ($lang) $pieces[] = 'lang='.$lang;
        // Currency: detect common multi-currency cookies before falling back; allow override via filter
        $detectedCur = '';
        $cookieKeys = [
            'woocommerce_multicurrency_for_woo_currency', // Fox/ViWCM
            'woocs_current_currency',                     // WOOCS
            'wmc_current_currency',                       // Woo Multi-Currency
            'yay_currency',                               // YayCurrency
            'aelia_cs_selected_currency',                 // Aelia
        ];
        foreach ($cookieKeys as $ck) {
            // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitize_text_field
            if (!empty($_COOKIE[$ck])) { $detectedCur = sanitize_text_field(wp_unslash($_COOKIE[$ck])); break; }
        }
        if ($detectedCur === '') { $detectedCur = (string) get_option('woocommerce_currency',''); }
        $currency = apply_filters('mamba_variant_currency', $detectedCur);
        $currency = $currency ? sanitize_key((string)$currency) : '';
        if ($currency) $pieces[] = 'cur='.$currency;
        // Canonicalize geolocation - prefer WooCommerce geo hash if present, else use country
        $useGeo = get_option('woocommerce_default_customer_address') === 'geolocation_ajax';
        if ($useGeo) {
            // First check for WooCommerce geo hash (highest priority)
            $wooGeoHash = self::detectWooCommerceGeoHash();
            
            if ($wooGeoHash) {
                // Use WooCommerce geo hash as canonical geolocation
                $pieces[] = 'woo_geo=' . $wooGeoHash;
            } else {
                // Fallback to country-based geolocation
                $geoCountry = '';
                // Custom MAMBA_GEO header (highest priority for merchant control)
                if (!empty($_SERVER['HTTP_MAMBA_GEO'])) {
                    // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitize_text_field
                    $geoCountry = sanitize_text_field(wp_unslash($_SERVER['HTTP_MAMBA_GEO']));
                    // Validate immediately
                    if (!preg_match('/^[A-Z]{2}$/', strtoupper($geoCountry))) {
                        $geoCountry = '';
                    }
                }
                // Common geolocation headers (fallback)
                if (empty($geoCountry)) {
                    $hdrs = [
                        'HTTP_GEOIP_COUNTRY_CODE','HTTP_X_GEO_COUNTRY'
                    ];
                    foreach ($hdrs as $h) { 
                        if (!empty($_SERVER[$h])) { 
                            // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitize_text_field
                            $geoCountry = sanitize_text_field(wp_unslash($_SERVER[$h]));
                            // Validate immediately
                            if (preg_match('/^[A-Z]{2}$/', strtoupper($geoCountry))) {
                                break;
                            } else {
                                $geoCountry = '';
                            }
                        } 
                    }
                }
                // Woo geo cookie (best effort)
                if (empty($geoCountry) && !empty($_COOKIE['woocommerce_geo_hash'])) {
                    // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitize_text_field
                    $val = sanitize_text_field(wp_unslash($_COOKIE['woocommerce_geo_hash']));
                    // Try simple splits first
                    foreach ([':', '|', ';', ','] as $sep) {
                        $parts = explode($sep, $val);
                        if (!empty($parts[0]) && preg_match('/^[A-Z]{2}$/', strtoupper($parts[0]))) { 
                            $geoCountry = strtoupper($parts[0]); 
                            break; 
                        }
                    }
                    // Fallback regex extraction
                    if (empty($geoCountry) && preg_match('/([A-Z]{2})/', strtoupper($val), $m)) { 
                        $geoCountry = $m[1];
                        // Validate the extracted value
                        if (!preg_match('/^[A-Z]{2}$/', $geoCountry)) {
                            $geoCountry = '';
                        }
                    }
                }
                $geoCountry = apply_filters('mamba_variant_country', $geoCountry);
                $geoCountry = $geoCountry ? strtoupper(sanitize_key((string)$geoCountry)) : '';
                // Final validation before use
                if ($geoCountry && preg_match('/^[A-Z]{2}$/', $geoCountry)) $pieces[] = 'country='.$geoCountry;
            }
        }
        
        $pieces[] = 'role=guest'; // We never cache logged-in users; normalize for invalidation
        
        // Tax display mode: affects price display (incl/excl)
        $taxDisplay = get_option('woocommerce_tax_display_shop', 'incl');
        $pieces[] = 'tax=' . sanitize_key($taxDisplay);
        
        return '-' . md5(implode('|', $pieces));
    }
    
    /**
     * Detect WooCommerce geolocation hash parameter
     * Validates and returns the 'v' parameter if present and valid
     */
    private static function detectWooCommerceGeoHash(?string $vParam = null): ?string {
        // Only read the specific 'v' parameter needed (avoid processing full $_GET)
        $v = $vParam ?? (isset($_GET['v']) ? sanitize_text_field(wp_unslash($_GET['v'])) : null);
        
        // Check for WooCommerce's v parameter
        if (!empty($v) && is_string($v)) {
            $hash = sanitize_text_field($v);
            // Validate it looks like a WooCommerce geo hash (alphanumeric, reasonable length)
            if (preg_match('/^[a-zA-Z0-9]{8,32}$/', $hash)) {
                return $hash;
            }
        }
        
        // Check for WooCommerce geo cookie as fallback
        if (!empty($_COOKIE['woocommerce_geo_hash'])) {
            // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Sanitized via sanitize_text_field
            $cookieHash = sanitize_text_field(wp_unslash($_COOKIE['woocommerce_geo_hash']));
            // Validate cookie hash format
            if (preg_match('/^[a-zA-Z0-9]{8,32}$/', $cookieHash)) {
                return $cookieHash;
            }
        }
        
        return null;
    }
}
