<?php
/**
 * Badger Intelligence Engine
 * 
 * Collects, normalizes, and transmits AI agent intelligence to the central
 * Badger Intelligence Network (BIN). This is the core data collection layer
 * that makes the honeypot valuable at scale.
 * 
 * Each WordPress site becomes a sensor node in a distributed intelligence network.
 */

declare(strict_types=1);

if (!defined('ABSPATH')) {
    exit;
}

class Badger_Intelligence
{
    private static ?self $instance = null;
    
    /** @var string API endpoint for intelligence ingestion */
    private const API_BASE = 'https://api.badgerwp.com/v1';
    
    /** @var int Batch size for transmission */
    private const BATCH_SIZE = 50;
    
    /** @var int Minimum seconds between syncs */
    private const SYNC_INTERVAL = 300; // 5 minutes

    public static function get_instance(): self
    {
        if (self::$instance === null) {
            self::$instance = new self();
        }
        return self::$instance;
    }

    public function __construct()
    {
        // Hook into detection events
        add_action('badger_agent_detected', [$this, 'collect_intelligence'], 10, 3);
        add_action('badger_honeypot_triggered', [$this, 'collect_honeypot_intelligence'], 10, 4);
        
        // Scheduled sync to central
        add_action('badger_intelligence_sync', [$this, 'sync_to_central']);
        
        if (!wp_next_scheduled('badger_intelligence_sync')) {
            wp_schedule_event(time(), 'five_minutes', 'badger_intelligence_sync');
        }
        
        // Register custom cron schedule
        add_filter('cron_schedules', [$this, 'add_cron_schedules']);
    }

    /**
     * Collect comprehensive intelligence on detected agents.
     * This is the goldmine - behavioral patterns, timing, targeting.
     */
    public function collect_intelligence(
        string $agent_id,
        string $agent_name,
        array $context
    ): void {
        $intelligence = [
            // Core identification
            'agent_id' => $agent_id,
            'agent_name' => $agent_name,
            'user_agent_raw' => $context['user_agent'] ?? '',
            'user_agent_normalized' => $this->normalize_user_agent($context['user_agent'] ?? ''),
            
            // Network fingerprint
            'ip_address' => $this->get_client_ip(),
            'ip_version' => $this->get_ip_version($this->get_client_ip()),
            'asn' => $this->get_asn($this->get_client_ip()),
            'country' => $this->get_country_code(),
            'datacenter' => $this->is_datacenter_ip($this->get_client_ip()),
            'vpn' => $this->is_vpn_ip($this->get_client_ip()),
            
            // Request fingerprint
            'request_uri' => $context['request_uri'] ?? '',
            'request_method' => $_SERVER['REQUEST_METHOD'] ?? 'GET',
            'request_time' => microtime(true),
            'http_version' => $_SERVER['SERVER_PROTOCOL'] ?? '',
            
            // Behavioral signals
            'accept_headers' => $this->get_accept_headers(),
            'encoding_headers' => $this->get_encoding_headers(),
            'language_headers' => $this->get_language_headers(),
            'referer' => $_SERVER['HTTP_REFERER'] ?? '',
            
            // Timing patterns (crucial for bot detection)
            'timestamp_utc' => gmdate('Y-m-d H:i:s'),
            'day_of_week' => gmdate('N'),
            'hour_of_day' => gmdate('G'),
            
            // Site context (anonymized)
            'site_type' => $this->get_site_type(),
            'content_management_system' => 'wordpress',
            'site_category' => $this->get_site_category(),
            
            // Session tracking
            'session_id' => $this->get_session_id(),
            'request_sequence' => $this->increment_request_sequence(),
            
            // Honeypot interaction depth
            'honeypot_depth' => $context['honeypot_depth'] ?? 0,
            'trap_id' => $context['trap_id'] ?? null,
        ];

        // Store locally first (queue for batch transmission)
        $this->queue_intelligence($intelligence);
        
        // Also store in local database for site owner's view
        $this->store_local($intelligence);
        
        // Real-time transmission for high-value targets
        if ($this->is_high_value_intelligence($intelligence)) {
            $this->transmit_immediately($intelligence);
        }
    }

    /**
     * Collect intelligence when honeypot is triggered.
     * This is the behavioral gold - bots that follow hidden links.
     */
    public function collect_honeypot_intelligence(
        string $trap_id,
        int $depth,
        array $request_context,
        ?array $detected_agent
    ): void {
        $intelligence = [
            'event_type' => 'honeypot_triggered',
            'trap_id' => $trap_id,
            'trap_depth' => $depth,
            'time_to_trigger_ms' => $this->calculate_time_to_trigger(),
            'links_followed' => $request_context['links_followed'] ?? [],
            'js_executed' => $request_context['js_executed'] ?? false,
            'cookies_accepted' => $request_context['cookies'] ?? [],
            
            // Mouse/behavior simulation detection
            'behavioral_score' => $this->calculate_behavioral_score($request_context),
            'fingerprint_confidence' => $this->calculate_fingerprint_confidence($request_context),
            
            // Agent identification (may differ from initial detection)
            'detected_agent' => $detected_agent,
            'agent_override' => $this->detect_agent_override($request_context),
        ];

        do_action('badger_agent_detected', 
            $detected_agent['id'] ?? 'unknown',
            $detected_agent['name'] ?? 'Unknown Agent',
            array_merge($request_context, $intelligence)
        );
    }

    /**
     * Normalize user agent string for comparison.
     * Strips version numbers, random tokens to identify patterns.
     */
    private function normalize_user_agent(string $ua): string
    {
        $ua = strtolower($ua);
        
        // Remove version numbers
        $ua = preg_replace('/\/\d+\.\d+(\.\d+)?/', '', $ua);
        
        // Remove random tokens (common in spoofed UAs)
        $ua = preg_replace('/[a-f0-9]{8,}/', '', $ua);
        
        // Normalize whitespace
        $ua = preg_replace('/\s+/', ' ', $ua);
        
        return trim($ua);
    }

    /**
     * Get comprehensive accept headers fingerprint.
     */
    private function get_accept_headers(): array
    {
        return [
            'accept' => $_SERVER['HTTP_ACCEPT'] ?? '',
            'accept_charset' => $_SERVER['HTTP_ACCEPT_CHARSET'] ?? '',
            'accept_encoding' => $_SERVER['HTTP_ACCEPT_ENCODING'] ?? '',
            'accept_language' => $_SERVER['HTTP_ACCEPT_LANGUAGE'] ?? '',
        ];
    }

    /**
     * Get encoding support fingerprint.
     */
    private function get_encoding_headers(): array
    {
        $encoding = $_SERVER['HTTP_ACCEPT_ENCODING'] ?? '';
        return [
            'gzip' => str_contains($encoding, 'gzip'),
            'deflate' => str_contains($encoding, 'deflate'),
            'br' => str_contains($encoding, 'br'),
            'identity' => str_contains($encoding, 'identity'),
        ];
    }

    /**
     * Get language preference fingerprint.
     */
    private function get_language_headers(): array
    {
        $lang = $_SERVER['HTTP_ACCEPT_LANGUAGE'] ?? '';
        
        // Parse quality values
        preg_match_all('/([a-z]{1,8}(?:-[a-z]{1,8})?)\s*(?:;q=([0-9.]+))?/i', $lang, $matches);
        
        $languages = [];
        foreach ($matches[1] as $i => $code) {
            $q = $matches[2][$i] ?: '1.0';
            $languages[$code] = (float) $q;
        }
        
        arsort($languages);
        return array_slice($languages, 0, 5, true);
    }

    /**
     * Calculate behavioral score based on request patterns.
     * Higher score = more bot-like behavior.
     */
    private function calculate_behavioral_score(array $context): float
    {
        $score = 0.0;
        
        // No referer = suspicious
        if (empty($_SERVER['HTTP_REFERER'])) {
            $score += 0.1;
        }
        
        // No accept headers = very suspicious
        if (empty($_SERVER['HTTP_ACCEPT'])) {
            $score += 0.3;
        }
        
        // Perfect request timing = bot
        if (!empty($context['request_interval'])) {
            if ($context['request_interval'] < 1.0) {
                $score += 0.2;
            }
        }
        
        // Multiple requests to honeypot = confirmed bot
        if (!empty($context['honeypot_depth']) && $context['honeypot_depth'] > 1) {
            $score += 0.4;
        }
        
        return min($score, 1.0);
    }

    /**
     * Calculate confidence in agent fingerprint.
     */
    private function calculate_fingerprint_confidence(array $context): float
    {
        $confidence = 0.5; // Base
        
        // User agent match
        if (!empty($context['user_agent_match'])) {
            $confidence += 0.2;
        }
        
        // Behavioral confirmation
        if (!empty($context['behavioral_score']) && $context['behavioral_score'] > 0.5) {
            $confidence += 0.2;
        }
        
        // IP reputation
        if (!empty($context['ip_reputation'])) {
            $confidence += 0.1;
        }
        
        return min($confidence, 1.0);
    }

    /**
     * Check if this intelligence is high-value (rare agent, new pattern, etc.)
     */
    private function is_high_value_intelligence(array $intelligence): bool
    {
        // New agent not in database
        $known_agents = $this->get_known_agent_ids();
        if (!in_array($intelligence['agent_id'], $known_agents)) {
            return true;
        }
        
        // Honeypot depth > 1 (confirmed behavioral bot)
        if (!empty($intelligence['honeypot_depth']) && $intelligence['honeypot_depth'] > 1) {
            return true;
        }
        
        // Suspicious IP patterns
        if (!empty($intelligence['datacenter']) && $intelligence['datacenter']) {
            return true;
        }
        
        // High behavioral score
        if (!empty($intelligence['behavioral_score']) && $intelligence['behavioral_score'] > 0.7) {
            return true;
        }
        
        return false;
    }

    /**
     * Queue intelligence for batch transmission.
     */
    private function queue_intelligence(array $intelligence): void
    {
        $queue = get_transient('badger_intelligence_queue') ?: [];
        $queue[] = $intelligence;
        
        // Keep only last 1000 items in transient
        if (count($queue) > 1000) {
            $queue = array_slice($queue, -1000);
        }
        
        set_transient('badger_intelligence_queue', $queue, HOUR_IN_SECONDS);
    }

    /**
     * Store intelligence locally for site owner dashboard.
     */
    private function store_local(array $intelligence): void
    {
        global $wpdb;
        
        $table = $wpdb->prefix . 'badger_intelligence';
        
        $wpdb->insert(
            $table,
            [
                'agent_id' => $intelligence['agent_id'],
                'agent_name' => $intelligence['agent_name'],
                'user_agent' => substr($intelligence['user_agent_raw'], 0, 500),
                'ip_address' => $intelligence['ip_address'],
                'country' => $intelligence['country'],
                'request_uri' => $intelligence['request_uri'],
                'event_type' => 'detection',
                'behavioral_score' => $intelligence['behavioral_score'] ?? 0,
                'honeypot_depth' => $intelligence['honeypot_depth'] ?? 0,
                'metadata' => wp_json_encode($intelligence),
                'created_at' => current_time('mysql', true),
            ],
            ['%s', '%s', '%s', '%s', '%s', '%s', '%s', '%f', '%d', '%s', '%s']
        );
    }

    /**
     * Sync queued intelligence to central server.
     * This is where the distributed network becomes valuable.
     */
    public function sync_to_central(): void
    {
        $last_sync = get_option('badger_last_intelligence_sync', 0);
        
        if ((time() - $last_sync) < self::SYNC_INTERVAL) {
            return;
        }
        
        $queue = get_transient('badger_intelligence_queue') ?: [];
        
        if (empty($queue)) {
            return;
        }
        
        // Get site UUID for attribution (anonymized)
        $site_uuid = $this->get_site_uuid();
        
        $batch = array_slice($queue, 0, self::BATCH_SIZE);
        
        $payload = [
            'site_uuid' => $site_uuid,
            'wordpress_version' => get_bloginfo('version'),
            'plugin_version' => BADGER_VERSION,
            'intelligence_count' => count($batch),
            'intelligence' => $batch,
            'site_metadata' => [
                'timezone' => wp_timezone_string(),
                'language' => get_locale(),
                'is_multisite' => is_multisite(),
                'content_types' => $this->get_content_types(),
            ],
        ];
        
        $response = wp_remote_post(
            self::API_BASE . '/intelligence/ingest',
            [
                'body' => wp_json_encode($payload),
                'headers' => [
                    'Content-Type' => 'application/json',
                    'X-Badger-Version' => BADGER_VERSION,
                ],
                'timeout' => 30,
                'sslverify' => true,
            ]
        );
        
        if (!is_wp_error($response) && wp_remote_retrieve_response_code($response) === 200) {
            // Remove sent items from queue
            $remaining = array_slice($queue, self::BATCH_SIZE);
            set_transient('badger_intelligence_queue', $remaining, HOUR_IN_SECONDS);
            update_option('badger_last_intelligence_sync', time());
            
            // Update intelligence network stats
            $this->update_network_stats(count($batch));
        }
    }

    /**
     * Transmit high-value intelligence immediately.
     */
    private function transmit_immediately(array $intelligence): void
    {
        wp_remote_post(
            self::API_BASE . '/intelligence/urgent',
            [
                'body' => wp_json_encode([
                    'site_uuid' => $this->get_site_uuid(),
                    'intelligence' => $intelligence,
                    'priority' => 'high',
                ]),
                'headers' => [
                    'Content-Type' => 'application/json',
                    'X-Badger-Version' => BADGER_VERSION,
                ],
                'timeout' => 10,
                'blocking' => false, // Fire and forget
            ]
        );
    }

    /**
     * Get anonymized site UUID.
     */
    private function get_site_uuid(): string
    {
        $uuid = get_option('badger_site_uuid');
        
        if (!$uuid) {
            $uuid = wp_generate_uuid4();
            update_option('badger_site_uuid', $uuid);
        }
        
        return $uuid;
    }

    /**
     * Update local stats about intelligence contributed.
     */
    private function update_network_stats(int $count): void
    {
        $stats = get_option('badger_intelligence_stats', [
            'total_contributed' => 0,
            'last_contribution' => 0,
            'unique_agents_seen' => [],
        ]);
        
        $stats['total_contributed'] += $count;
        $stats['last_contribution'] = time();
        
        update_option('badger_intelligence_stats', $stats);
    }

    /**
     * Get intelligence stats for display in dashboard.
     */
    public function get_local_stats(): array
    {
        global $wpdb;
        $table = $wpdb->prefix . 'badger_intelligence';
        
        $total = $wpdb->get_var("SELECT COUNT(*) FROM $table");
        $unique_agents = $wpdb->get_var("SELECT COUNT(DISTINCT agent_id) FROM $table");
        $unique_ips = $wpdb->get_var("SELECT COUNT(DISTINCT ip_address) FROM $table");
        
        $today = $wpdb->get_var($wpdb->prepare(
            "SELECT COUNT(*) FROM $table WHERE DATE(created_at) = %s",
            current_time('Y-m-d')
        ));
        
        return [
            'total_detections' => (int) $total,
            'unique_agents' => (int) $unique_agents,
            'unique_ips' => (int) $unique_ips,
            'today_detections' => (int) $today,
            'network_contribution' => get_option('badger_intelligence_stats', []),
        ];
    }

    /**
     * Add custom cron schedules.
     */
    public function add_cron_schedules(array $schedules): array
    {
        $schedules['five_minutes'] = [
            'interval' => 300,
            'display' => __('Every 5 Minutes', 'badger'),
        ];
        
        return $schedules;
    }

    // Helper methods (stubs for now)
    private function get_client_ip(): string
    {
        $keys = ['HTTP_CF_CONNECTING_IP', 'HTTP_X_FORWARDED_FOR', 'HTTP_X_REAL_IP', 'REMOTE_ADDR'];
        foreach ($keys as $key) {
            if (!empty($_SERVER[$key])) {
                $ip = $_SERVER[$key];
                if (str_contains($ip, ',')) {
                    $ip = trim(explode(',', $ip)[0]);
                }
                return sanitize_text_field($ip);
            }
        }
        return '';
    }

    private function get_ip_version(string $ip): int
    {
        return filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6) ? 6 : 4;
    }

    private function get_asn(string $ip): ?string
    {
        // Would integrate with IP-to-ASN service
        return null;
    }

    private function get_country_code(): string
    {
        // Would integrate with GeoIP
        return '';
    }

    private function is_datacenter_ip(string $ip): bool
    {
        // Would check against known datacenter ranges
        return false;
    }

    private function is_vpn_ip(string $ip): bool
    {
        // Would check against VPN/proxy lists
        return false;
    }

    private function get_site_type(): string
    {
        if (is_multisite()) return 'multisite';
        if (class_exists('WooCommerce')) return 'ecommerce';
        if (post_type_exists('product')) return 'business';
        return 'content';
    }

    private function get_site_category(): string
    {
        // Could be enriched via settings
        return get_option('badger_site_category', 'general');
    }

    private function get_session_id(): string
    {
        // Anonymized session identifier
        return md5($_SERVER['REMOTE_ADDR'] . ($_SERVER['HTTP_USER_AGENT'] ?? '') . date('Y-m-d H'));
    }

    private function increment_request_sequence(): int
    {
        $session = $this->get_session_id();
        $key = 'badger_seq_' . $session;
        $seq = get_transient($key) ?: 0;
        $seq++;
        set_transient($key, $seq, HOUR_IN_SECONDS);
        return $seq;
    }

    private function calculate_time_to_trigger(): ?float
    {
        // Would calculate from honeypot render to trigger
        return null;
    }

    private function detect_agent_override(array $context): ?array
    {
        // Detect if agent changed signature during session
        return null;
    }

    private function get_known_agent_ids(): array
    {
        $crawlers = $this->get_default_crawlers();
        return array_column($crawlers, 'id');
    }

    private function get_default_crawlers(): array
    {
        return [
            ['id' => 'gptbot', 'name' => 'GPTBot'],
            ['id' => 'claudebot', 'name' => 'ClaudeBot'],
            ['id' => 'bytespider', 'name' => 'Bytespider'],
            ['id' => 'perplexitybot', 'name' => 'PerplexityBot'],
        ];
    }

    private function get_content_types(): array
    {
        $types = ['post', 'page'];
        if (class_exists('WooCommerce')) $types[] = 'product';
        return $types;
    }
}
