<?php
/**
 * AI bot detection engine. Identifies crawlers via user-agent and honeypot triggers.
 */

declare(strict_types=1);

if (!defined('ABSPATH')) {
    exit;
}

class Badger_Detector
{
    private static ?self $instance = null;

    private array $ai_crawlers = [];

    public static function get_instance(): self
    {
        if (self::$instance === null) {
            self::$instance = new self();
        }
        return self::$instance;
    }

    public function __construct()
    {
        $this->load_crawler_list();
        add_action('init', [$this, 'check_request'], 5);
    }

    /**
     * Load curated AI crawler signatures from JSON.
     */
    private function load_crawler_list(): void
    {
        $path = BADGER_PLUGIN_DIR . 'data/ai-crawlers.json';
        if (!file_exists($path)) {
            $this->ai_crawlers = $this->get_default_crawlers();
            return;
        }

        $json = file_get_contents($path);
        $data = json_decode($json, true);
        $this->ai_crawlers = is_array($data) ? $data : $this->get_default_crawlers();
    }

    /**
     * Default AI crawler list (fallback when JSON missing).
     */
    private function get_default_crawlers(): array
    {
        return [
            ['id' => 'gptbot', 'name' => 'GPTBot', 'patterns' => ['GPTBot']],
            ['id' => 'claudebot', 'name' => 'ClaudeBot', 'patterns' => ['ClaudeBot', 'Claude-Web']],
            ['id' => 'bytespider', 'name' => 'Bytespider', 'patterns' => ['Bytespider']],
            ['id' => 'perplexitybot', 'name' => 'PerplexityBot', 'patterns' => ['PerplexityBot']],
            ['id' => 'google-extended', 'name' => 'Google-Extended', 'patterns' => ['Google-Extended']],
            ['id' => 'ccbot', 'name' => 'CCBot', 'patterns' => ['CCBot']],
            ['id' => 'anthropic-ai', 'name' => 'Anthropic-AI', 'patterns' => ['Anthropic-AI']],
            ['id' => 'cohere-ai', 'name' => 'Cohere-AI', 'patterns' => ['Cohere-AI']],
        ];
    }

    /**
     * Check each request for AI bot signatures.
     */
    public function check_request(): void
    {
        if (is_admin() || wp_doing_cron()) {
            return;
        }

        $user_agent = $_SERVER['HTTP_USER_AGENT'] ?? '';

        if (empty($user_agent)) {
            return;
        }

        $detected = $this->identify_agent($user_agent);

        if ($detected) {
            $this->log_event('detected', $detected, $user_agent);
        }
    }

    /**
     * Identify AI agent from user-agent string.
     */
    public function identify_agent(string $user_agent): ?array
    {
        $user_agent_lower = strtolower($user_agent);

        foreach ($this->ai_crawlers as $crawler) {
            foreach ($crawler['patterns'] as $pattern) {
                if (str_contains($user_agent_lower, strtolower($pattern))) {
                    return [
                        'id' => $crawler['id'],
                        'name' => $crawler['name'],
                    ];
                }
            }
        }

        return null;
    }

    /**
     * Log detection event to database.
     */
    private function log_event(string $event_type, array $agent, string $user_agent): void
    {
        global $wpdb;
        $table = $wpdb->prefix . 'badger_events';

        $wpdb->insert(
            $table,
            [
                'event_type' => $event_type,
                'agent_id' => $agent['id'],
                'agent_name' => $agent['name'],
                'user_agent' => substr($user_agent, 0, 500),
                'ip_address' => $this->get_client_ip(),
                'request_uri' => $_SERVER['REQUEST_URI'] ?? '',
                'referer' => $_SERVER['HTTP_REFERER'] ?? null,
            ],
            ['%s', '%s', '%s', '%s', '%s', '%s', '%s']
        );
    }

    /**
     * Get client IP address.
     */
    private function get_client_ip(): string
    {
        $keys = ['HTTP_CF_CONNECTING_IP', 'HTTP_X_FORWARDED_FOR', 'HTTP_X_REAL_IP', 'REMOTE_ADDR'];
        foreach ($keys as $key) {
            if (!empty($_SERVER[$key])) {
                $ip = $_SERVER[$key];
                if (str_contains($ip, ',')) {
                    $ip = trim(explode(',', $ip)[0]);
                }
                return sanitize_text_field($ip);
            }
        }
        return '';
    }

    /**
     * Get list of known AI crawlers.
     */
    public function get_crawlers(): array
    {
        return $this->ai_crawlers;
    }
}
