<?php

/**
 * @file
 * Contains \Drupal\ai_404_redirect\Service\AiRedirectAnalyzer.
 */

namespace Drupal\ai_404_redirect\Service;

use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\Core\Database\Connection;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\State\StateInterface;
use Drupal\path_alias\AliasManagerInterface;
use Drupal\ai\AiProviderPluginManager;
use Drupal\ai\OperationType\Chat\ChatInterface;
use Drupal\ai\OperationType\Chat\ChatOutput;
use Symfony\Component\HttpFoundation\Request;

/**
 * Service to analyze 404 errors and suggest redirects using AI.
 */
class AiRedirectAnalyzer {

  /**
   * The AI provider manager.
   *
   * @var \Drupal\ai\AiProviderPluginManager
   */
  protected $aiProviderManager;

  /**
   * The entity type manager.
   *
   * @var \Drupal\Core\Entity\EntityTypeManagerInterface
   */
  protected $entityTypeManager;

  /**
   * The path alias manager.
   *
   * @var \Drupal\path_alias\AliasManagerInterface
   */
  protected $pathAliasManager;

  /**
   * The config factory.
   *
   * @var \Drupal\Core\Config\ConfigFactoryInterface
   */
  protected $configFactory;

  /**
   * The database connection.
   *
   * @var \Drupal\Core\Database\Connection
   */
  protected $database;

  /**
   * The state service.
   *
   * @var \Drupal\Core\State\StateInterface
   */
  protected $state;

  /**
   * Constructs a new AiRedirectAnalyzer.
   *
   * @param \Drupal\ai\AiProviderPluginManager $ai_provider_manager
   *   The AI provider manager.
   * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entity_type_manager
   *   The entity type manager.
   * @param \Drupal\path_alias\AliasManagerInterface $path_alias_manager
   *   The path alias manager.
   * @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory
   *   The config factory.
   * @param \Drupal\Core\State\StateInterface $state
   *   The state service.
   */
  public function __construct(
    AiProviderPluginManager $ai_provider_manager,
    EntityTypeManagerInterface $entity_type_manager,
    AliasManagerInterface $path_alias_manager,
    ConfigFactoryInterface $config_factory,
    StateInterface $state = NULL
  ) {
    $this->aiProviderManager = $ai_provider_manager;
    $this->entityTypeManager = $entity_type_manager;
    $this->pathAliasManager = $path_alias_manager;
    $this->configFactory = $config_factory;
    $this->database = \Drupal::database();
    $this->state = $state ?: \Drupal::service('state');
  }

  /**
   * Analyzes a 404 error and suggests a redirect.
   *
   * @param string $path
   *   The 404 path.
   * @param \Symfony\Component\HttpFoundation\Request $request
   *   The request object.
   */
  public function analyze404($path, Request $request) {
    $config = $this->configFactory->get('ai_404_redirect.settings');
    $user_agent = $request->headers->get('User-Agent', '');
    $client_ip = $request->getClientIp();
    
    // Enhanced bot/trolling detection.
    $is_bot = $this->isBot($user_agent);
    $is_suspicious = FALSE;
    
    if ($config->get('enable_bot_blocking')) {
      $is_suspicious = $this->isSuspiciousActivity($path, $client_ip, $user_agent);
    }
    
    // Skip if bot or suspicious activity.
    if ($is_bot || $is_suspicious) {
      \Drupal::logger('ai_404_redirect')->debug('Skipping 404 analysis for path: @path (bot: @bot, suspicious: @suspicious)', [
        '@path' => $path,
        '@bot' => $is_bot ? 'yes' : 'no',
        '@suspicious' => $is_suspicious ? 'yes' : 'no',
      ]);
      return;
    }

    // Check if we've already analyzed this path.
    $existing = $this->database->select('ai_404_redirect_suggestions', 's')
      ->fields('s', ['id', 'status', '404_count'])
      ->condition('source_path', $path)
      ->orderBy('created', 'DESC')
      ->range(0, 1)
      ->execute()
      ->fetchObject();

    // Get minimum 404 count threshold.
    $minimum_count = (int) ($config->get('minimum_404_count') ?? 1);
    
    if ($existing) {
      // Increment 404 count.
      // Note: Can't use ->404_count directly, use array access or cast to array.
      $existing_array = (array) $existing;
      $new_count = (isset($existing_array['404_count']) ? (int) $existing_array['404_count'] : 1) + 1;
      
      $this->database->update('ai_404_redirect_suggestions')
        ->fields([
          '404_count' => $new_count,
          'updated' => \Drupal::time()->getRequestTime(),
        ])
        ->condition('id', $existing->id)
        ->execute();
      
      // Only analyze/create redirect if we've hit the threshold.
      if ($new_count < $minimum_count) {
        \Drupal::logger('ai_404_redirect')->debug('404 count (@count) below threshold (@threshold) for path: @path', [
          '@count' => $new_count,
          '@threshold' => $minimum_count,
          '@path' => $path,
        ]);
        return;
      }
      
      // If already processed and not pending, skip re-analysis unless count just hit threshold.
      if ($existing->status !== 'pending' && $new_count > $minimum_count) {
        return;
      }
    }

    // Get candidate pages for matching - pass the path for intelligent pre-filtering.
    $candidates = $this->getCandidatePages($path);

    // Use AI to analyze the 404.
    $analysis = $this->analyzeWithAI($path, $candidates, $user_agent, $is_bot);

    // Save or update the suggestion.
    if ($existing) {
      // Update existing suggestion with new analysis.
      $this->updateSuggestion($existing->id, $analysis, $user_agent, $is_bot, $client_ip);
    }
    else {
      // Create new suggestion.
      $this->saveSuggestion($path, $analysis, $user_agent, $is_bot, $client_ip);
    }

    // Auto-approve if confidence is high enough and count threshold is met.
    $confidence_threshold = $config->get('auto_approve_confidence_threshold') ?? 80;
    if ($existing) {
      $existing_array = (array) $existing;
      $current_count = (isset($existing_array['404_count']) ? (int) $existing_array['404_count'] : 1) + 1;
    } else {
      $current_count = 1;
    }
    
    if ($analysis['should_redirect'] && 
        $analysis['confidence_score'] >= $confidence_threshold && 
        $current_count >= $minimum_count &&
        $analysis['suggested_path']) {
      $this->createRedirect($path, $analysis);
    }
  }

  /**
   * Determines if a user agent is a bot.
   *
   * @param string $user_agent
   *   The user agent string.
   *
   * @return bool
   *   TRUE if bot, FALSE otherwise.
   */
  protected function isBot($user_agent) {
    if (empty($user_agent)) {
      return TRUE; // Empty user agent is suspicious.
    }
    
    $bot_patterns = [
      'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget',
      'googlebot', 'bingbot', 'slurp', 'duckduckbot', 'baiduspider',
      'yandexbot', 'sogou', 'exabot', 'facebot', 'ia_archiver',
      'python-requests', 'java/', 'go-http-client', 'okhttp',
      'scrapy', 'headless', 'phantomjs', 'selenium',
    ];

    $user_agent_lower = strtolower($user_agent);
    foreach ($bot_patterns as $pattern) {
      if (strpos($user_agent_lower, $pattern) !== FALSE) {
        return TRUE;
      }
    }

    return FALSE;
  }

  /**
   * Detects suspicious activity that might be trolling/exploitation attempts.
   *
   * @param string $path
   *   The 404 path.
   * @param string $client_ip
   *   The client IP address.
   * @param string $user_agent
   *   The user agent string.
   *
   * @return bool
   *   TRUE if activity is suspicious, FALSE otherwise.
   */
  protected function isSuspiciousActivity($path, $client_ip, $user_agent) {
    $config = $this->configFactory->get('ai_404_redirect.settings');
    $suspicious_threshold = (int) ($config->get('suspicious_ip_threshold') ?? 10);
    
    // Check if this IP has hit too many different 404s recently (trolling pattern).
    $recent_time = \Drupal::time()->getRequestTime() - 3600; // Last hour.
    $unique_404s = $this->database->select('ai_404_redirect_suggestions', 's')
      ->condition('client_ip', $client_ip)
      ->condition('created', $recent_time, '>=')
      ->distinct()
      ->countQuery()
      ->execute()
      ->fetchField();
    
    if ($unique_404s >= $suspicious_threshold) {
      \Drupal::logger('ai_404_redirect')->warning('Suspicious activity detected: IP @ip has hit @count unique 404s in the last hour. Blocking.', [
        '@ip' => $client_ip,
        '@count' => $unique_404s,
      ]);
      return TRUE;
    }
    
    // Check for suspicious path patterns (common exploitation attempts).
    $suspicious_patterns = [
      '/wp-admin',
      '/wp-login',
      '/administrator',
      '/phpmyadmin',
      '/.env',
      '/config.php',
      '/shell.php',
      '/cmd.php',
      '/eval(',
      '/system(',
      '/exec(',
      '/base64',
      '/\.\./', // Directory traversal
      '/%00', // Null byte
      '/union.*select', // SQL injection pattern
    ];
    
    $path_lower = strtolower($path);
    foreach ($suspicious_patterns as $pattern) {
      if (preg_match('/' . preg_quote($pattern, '/') . '/i', $path_lower)) {
        \Drupal::logger('ai_404_redirect')->warning('Suspicious path pattern detected: @path (pattern: @pattern)', [
          '@path' => $path,
          '@pattern' => $pattern,
        ]);
        return TRUE;
      }
    }
    
    // Check for random-looking paths (many random characters).
    // If path has many random alphanumeric sequences, it might be a bot.
    if (preg_match('/[a-z0-9]{20,}/i', $path)) {
      // Check if this looks like a random string (not a real word pattern).
      $path_parts = explode('/', trim($path, '/'));
      $random_looking = 0;
      foreach ($path_parts as $part) {
        // If a part is very long and doesn't contain common words, it's suspicious.
        if (strlen($part) > 15 && !preg_match('/\b(admin|user|page|content|node|article|blog|news)\b/i', $part)) {
          $random_looking++;
        }
      }
      if ($random_looking >= 2) {
        \Drupal::logger('ai_404_redirect')->debug('Random-looking path detected: @path', [
          '@path' => $path,
        ]);
        // Don't block completely, but mark as lower priority.
        return FALSE; // Allow it but it won't get high priority.
      }
    }
    
    return FALSE;
  }

  /**
   * Gets candidate pages for matching.
   *
   * @return array
   *   Array of candidate pages with title and path.
   */
  /**
   * Gets candidate pages using the keyword index with fuzzy matching.
   *
   * This method uses a pre-built keyword index to search ALL path aliases
   * efficiently. It queries each word individually and unions results, and
   * includes fuzzy/typo matching using SOUNDEX for phonetic similarity.
   *
   * @param string $path
   *   The 404 path to match against.
   *
   * @return array
   *   Array of candidate pages with nid, title, alias, and type.
   */
  protected function getCandidatePages($path = NULL) {
    $candidates = [];
    $node_ids = [];
    $alias_map = [];
    
    // Use the keyword index for fast searching if we have a path.
    if (!empty($path)) {
      $path_parts = array_filter(explode('/', trim($path, '/')));
      $last_segment = !empty($path_parts) ? strtolower(end($path_parts)) : '';
      $first_segment = !empty($path_parts) ? strtolower(reset($path_parts)) : '';
      
      // Extract keywords from the 404 path.
      $search_keywords = [];
      foreach ($path_parts as $part) {
        $part_lower = strtolower($part);
        // Split on hyphens/underscores.
        $subparts = preg_split('/[-_]/', $part_lower);
        foreach ($subparts as $subpart) {
          if (strlen($subpart) >= 3) {
            $search_keywords[] = $subpart;
          }
        }
      }
      
      // Query each word individually and union results for better matching.
      $all_results = [];
      
      // 1. Exact last segment match (highest priority).
      if (!empty($last_segment)) {
        $query = $this->database->select('ai_404_redirect_alias_index', 'idx');
        $query->fields('idx', ['node_id', 'alias', 'path']);
        $query->condition('idx.last_segment', $last_segment);
        $results = $query->execute()->fetchAll();
        foreach ($results as $result) {
          $all_results[$result->node_id] = $result;
        }
      }
      
      // 2. Query each keyword individually (exact match first).
      foreach ($search_keywords as $keyword) {
        // Exact keyword match.
        $query = $this->database->select('ai_404_redirect_alias_index', 'idx');
        $query->fields('idx', ['node_id', 'alias', 'path']);
        $query->condition('idx.keywords', '%' . $keyword . '%', 'LIKE');
        $results = $query->execute()->fetchAll();
        foreach ($results as $result) {
          if (!isset($all_results[$result->node_id])) {
            $all_results[$result->node_id] = $result;
          }
        }
        
        // Fuzzy matching using SOUNDEX for phonetic similarity.
        // This catches typos like "offed" -> "offer", "carreers" -> "careers".
        $soundex_code = soundex($keyword);
        if (!empty($soundex_code) && $soundex_code !== '0000') {
          $query = $this->database->select('ai_404_redirect_alias_index', 'idx');
          $query->fields('idx', ['node_id', 'alias', 'path']);
          $query->condition('idx.soundex_keywords', '%' . $soundex_code . '%', 'LIKE');
          $results = $query->execute()->fetchAll();
          foreach ($results as $result) {
            if (!isset($all_results[$result->node_id])) {
              $all_results[$result->node_id] = $result;
            }
          }
        }
        
        // Additional fuzzy matching: handle common typo patterns.
        // Check for variations with double letters, missing letters, etc.
        $fuzzy_variations = $this->generateFuzzyVariations($keyword);
        foreach ($fuzzy_variations as $variation) {
          if (strlen($variation) >= 3) {
            $query = $this->database->select('ai_404_redirect_alias_index', 'idx');
            $query->fields('idx', ['node_id', 'alias', 'path']);
            $query->condition('idx.keywords', '%' . $variation . '%', 'LIKE');
            $results = $query->execute()->fetchAll();
            foreach ($results as $result) {
              if (!isset($all_results[$result->node_id])) {
                $all_results[$result->node_id] = $result;
              }
            }
          }
        }
      }
      
      // 3. First segment match (for path structure similarity).
      if (!empty($first_segment)) {
        $query = $this->database->select('ai_404_redirect_alias_index', 'idx');
        $query->fields('idx', ['node_id', 'alias', 'path']);
        $query->condition('idx.first_segment', $first_segment);
        $results = $query->execute()->fetchAll();
        foreach ($results as $result) {
          if (!isset($all_results[$result->node_id])) {
            $all_results[$result->node_id] = $result;
          }
        }
      }
      
      // Limit to top 500 results.
      $all_results = array_slice($all_results, 0, 500, TRUE);
      
      foreach ($all_results as $result) {
        if (!empty($result->node_id)) {
          $node_ids[$result->node_id] = $result->node_id;
          $alias_map[$result->node_id] = $result->alias;
        }
      }
    }
    
    // If no matches from index, or if index is empty, fall back to recent nodes.
    if (empty($node_ids)) {
      $node_ids = $this->database->select('node_field_data', 'n')
        ->fields('n', ['nid'])
        ->condition('n.status', 1)
        ->orderBy('n.changed', 'DESC')
        ->range(0, 200)
        ->execute()
        ->fetchCol();
    }
    
    // Get node data for the filtered node IDs.
    if (!empty($node_ids)) {
      $node_query = $this->database->select('node_field_data', 'n');
      $node_query->fields('n', ['nid', 'title', 'type'])
        ->condition('n.status', 1)
        ->condition('n.nid', $node_ids, 'IN');
      
      $node_results = $node_query->execute()->fetchAll();
      
      foreach ($node_results as $node_result) {
        $nid = (int) $node_result->nid;
        $alias = $alias_map[$nid] ?? NULL;
        
        // If we don't have alias from index, try to get it.
        if (empty($alias)) {
          $alias = $this->pathAliasManager->getAliasByPath('/node/' . $nid);
          if ($alias == '/node/' . $nid) {
            $alias = NULL; // No alias exists
          }
        }
        
        $candidates[] = [
          'nid' => $nid,
          'title' => $node_result->title,
          'alias' => $alias,
          'type' => $node_result->type,
        ];
      }
    }

    \Drupal::logger('ai_404_redirect')->debug('Retrieved @count candidate pages using keyword index', [
      '@count' => count($candidates),
    ]);

    return $candidates;
  }

  /**
   * Analyzes a 404 path with AI.
   *
   * @param string $path
   *   The 404 path.
   * @param array $candidates
   *   Array of candidate pages.
   * @param string $user_agent
   *   The user agent string.
   * @param bool $is_bot
   *   Whether this is a bot request.
   *
   * @return array
   *   Analysis results.
   */
  protected function analyzeWithAI($path, array $candidates, $user_agent, $is_bot) {
    $config = $this->configFactory->get('ai_404_redirect.settings');
    
    // Log if no candidates available.
    if (empty($candidates)) {
      \Drupal::logger('ai_404_redirect')->warning('No candidate pages available for path: @path', [
        '@path' => $path,
      ]);
      return $this->fallbackAnalysis($path, $candidates, $is_bot);
    }
    
    // Check if AI is disabled due to configuration issues.
    $ai_disabled_reason = $this->state->get('ai_404_redirect.ai_disabled_reason');
    if ($ai_disabled_reason) {
      $last_ai_call = $this->state->get('ai_404_redirect.last_ai_call', 0);
      $current_time = \Drupal::time()->getRequestTime();
      
      // If still in the disabled period, use fallback.
      if ($last_ai_call > $current_time) {
        $wait_seconds = $last_ai_call - $current_time;
        \Drupal::logger('ai_404_redirect')->debug('AI disabled due to @reason. Waiting @seconds more seconds. Using fallback for path: @path', [
          '@reason' => $ai_disabled_reason,
          '@seconds' => $wait_seconds,
          '@path' => $path,
        ]);
        return $this->fallbackAnalysis($path, $candidates, $is_bot);
      }
      else {
        // Disabled period expired, clear the flag and try again.
        $this->state->delete('ai_404_redirect.ai_disabled_reason');
        \Drupal::logger('ai_404_redirect')->info('AI disabled period expired. Attempting AI call again for path: @path', [
          '@path' => $path,
        ]);
      }
    }
    
    // Throttle AI calls to prevent rate limiting.
    // Wait at least 3 seconds between AI API calls.
    $last_ai_call = $this->state->get('ai_404_redirect.last_ai_call', 0);
    $current_time = \Drupal::time()->getRequestTime();
    $min_delay = 3; // Minimum seconds between calls
    
    // If last_ai_call is in the future (set due to rate limit), wait until that time.
    if ($last_ai_call > $current_time) {
      $wait_seconds = $last_ai_call - $current_time;
      \Drupal::logger('ai_404_redirect')->debug('AI calls are throttled. Waiting @seconds more seconds before next AI call. Using fallback for path: @path', [
        '@path' => $path,
        '@seconds' => $wait_seconds,
      ]);
      return $this->fallbackAnalysis($path, $candidates, $is_bot);
    }
    
    // Normal throttling - wait at least min_delay seconds between calls.
    if (($current_time - $last_ai_call) < $min_delay && $last_ai_call > 0) {
      $seconds_since_last = $current_time - $last_ai_call;
      \Drupal::logger('ai_404_redirect')->debug('Throttling AI call for path: @path. Last call was @seconds seconds ago. Need @needed more seconds.', [
        '@path' => $path,
        '@seconds' => $seconds_since_last,
        '@needed' => $min_delay - $seconds_since_last,
      ]);
      // Use fallback immediately to avoid rate limits.
      return $this->fallbackAnalysis($path, $candidates, $is_bot);
    }
    
    $prompt = $this->buildPrompt($path, $candidates, $is_bot);

    try {
      // Get the AI provider from config.
      $provider_id = $config->get('ai_provider') ?? 'openai';
      
      \Drupal::logger('ai_404_redirect')->debug('Attempting to use AI provider: @provider for path: @path', [
        '@provider' => $provider_id,
        '@path' => $path,
      ]);
      
      // Check if provider exists.
      $provider_definitions = $this->aiProviderManager->getDefinitions();
      if (!isset($provider_definitions[$provider_id])) {
        throw new \Exception("AI provider '{$provider_id}' is not available. Available providers: " . implode(', ', array_keys($provider_definitions)));
      }
      
      // Get the provider instance.
      $provider = $this->aiProviderManager->createInstance($provider_id);
      
      // Check if provider is usable for chat operations.
      if (!$provider->isUsable('chat')) {
        \Drupal::logger('ai_404_redirect')->error('AI provider is not configured or not usable for chat operations. Check API keys and configuration at /admin/config/ai/models');
        throw new \Exception('AI provider is not configured or not usable for chat operations. Please check your API keys and configuration.');
      }

      // Get a model for chat.
      $models = $provider->getConfiguredModels('chat');
      if (empty($models)) {
        throw new \Exception('No chat models configured for provider.');
      }
      $model_id = array_key_first($models);

      $provider_class = get_class($provider);
      $has_chat_method = method_exists($provider, 'chat');
      $implements_interface = $provider instanceof ChatInterface;
      
      \Drupal::logger('ai_404_redirect')->debug('Using AI model: @model for path: @path. Provider class: @class. Has chat method: @has_chat. Implements ChatInterface: @implements', [
        '@model' => $model_id,
        '@path' => $path,
        '@class' => $provider_class,
        '@has_chat' => $has_chat_method ? 'yes' : 'no',
        '@implements' => $implements_interface ? 'yes' : 'no',
      ]);

      // Make the chat request.
      // Since isUsable('chat') returned true and we have models, the provider should support chat.
      $messages = [
        ['role' => 'user', 'content' => $prompt],
      ];

      // Try to get an operation handler if the provider supports it.
      $chat_output = NULL;
      if (method_exists($provider, 'getOperation')) {
        try {
          $operation = $provider->getOperation('chat');
          if ($operation && method_exists($operation, 'execute')) {
            // Some providers use operation handlers.
            $chat_output = $operation->execute($messages, $model_id);
          }
        }
        catch (\Exception $e) {
          \Drupal::logger('ai_404_redirect')->debug('getOperation approach failed: @message', [
            '@message' => $e->getMessage(),
          ]);
        }
      }
      
      // If operation handler didn't work, try direct chat method.
      if ($chat_output === NULL) {
        if ($has_chat_method) {
          // Try with model_id first (standard signature).
          try {
            $chat_output = $provider->chat($messages, $model_id);
          }
          catch (\TypeError $e) {
            // If that fails, try with just messages.
            try {
              $chat_output = $provider->chat($messages);
            }
            catch (\Exception $e2) {
              throw new \Exception('Chat method call failed with both signatures. First error: ' . $e->getMessage() . '. Second error: ' . $e2->getMessage());
            }
          }
        }
        elseif ($implements_interface) {
          // If it implements the interface, use it.
          $chat_output = $provider->chat($messages, $model_id);
        }
        else {
          // Last resort: try calling chat anyway if isUsable returned true.
          try {
            $chat_output = $provider->chat($messages, $model_id);
          }
          catch (\Exception $e) {
            throw new \Exception('Provider reports it is usable for chat but chat() method failed. Provider class: ' . $provider_class . '. Error: ' . $e->getMessage());
          }
        }
      }

      // Extract the response content from ChatOutput.
      $response_text = '';
      
      // ChatOutput has getNormalized() which returns a ChatMessage.
      // ChatMessage has getText() to get the actual text.
      if ($chat_output instanceof ChatOutput) {
        $normalized = $chat_output->getNormalized();
        
        // Check if it's a ChatMessage (not a streamed iterator).
        if ($normalized instanceof \Drupal\ai\OperationType\Chat\ChatMessage) {
          $response_text = $normalized->getText();
        }
        elseif ($normalized instanceof \Drupal\ai\OperationType\Chat\StreamedChatMessageIteratorInterface) {
          // For streamed responses, we need to iterate through them.
          // For now, just get the first message or log a warning.
          \Drupal::logger('ai_404_redirect')->warning('Received streamed chat response - this is not fully supported. Using fallback.');
          $response_text = '';
        }
        else {
          // Fallback: try to get text from normalized if it has a method.
          if (is_object($normalized) && method_exists($normalized, 'getText')) {
            $response_text = $normalized->getText();
          }
          else {
            $response_text = (string) $normalized;
          }
        }
      }
      elseif (is_object($chat_output)) {
        // For other objects, try common methods.
        if (method_exists($chat_output, 'getText')) {
          $response_text = $chat_output->getText();
        }
        elseif (method_exists($chat_output, 'getContent')) {
          $response_text = $chat_output->getContent();
        }
        elseif (method_exists($chat_output, '__toString')) {
          $response_text = (string) $chat_output;
        }
        else {
          // Try to get content from common properties.
          $response_text = $chat_output->content ?? $chat_output->text ?? $chat_output->message ?? '';
        }
      }
      elseif (is_string($chat_output)) {
        $response_text = $chat_output;
      }
      else {
        $response_text = '';
      }

      \Drupal::logger('ai_404_redirect')->debug('AI response received for path: @path. Response length: @length', [
        '@path' => $path,
        '@length' => strlen($response_text),
      ]);

      $result = $this->parseAIResponse($response_text, $candidates);
      
      // Validate AI result: if AI suggested something, check if our fallback algorithm
      // finds a better match (especially for exact word matches in same position).
      if ($result['should_redirect'] && !empty($result['suggested_path'])) {
        $fallback_result = $this->fallbackAnalysis($path, $candidates, $is_bot);
        
        // If fallback found a match with higher confidence or better path structure match,
        // prefer the fallback result. This catches cases where AI missed an obvious match.
        if ($fallback_result['should_redirect'] && !empty($fallback_result['suggested_path'])) {
          $ai_path_parts = array_filter(explode('/', trim($result['suggested_path'], '/')));
          $fallback_path_parts = array_filter(explode('/', trim($fallback_result['suggested_path'], '/')));
          $path_parts = array_filter(explode('/', trim($path, '/')));
          
          // Check if fallback has exact word match in same position (especially last segment).
          $fallback_has_exact_match = FALSE;
          $ai_has_exact_match = FALSE;
          
          if (!empty($path_parts) && !empty($fallback_path_parts)) {
            $last_path_seg = strtolower(end($path_parts));
            $last_fallback_seg = strtolower(end($fallback_path_parts));
            if ($last_path_seg === $last_fallback_seg) {
              $fallback_has_exact_match = TRUE;
            }
          }
          
          if (!empty($path_parts) && !empty($ai_path_parts)) {
            $last_path_seg = strtolower(end($path_parts));
            $last_ai_seg = strtolower(end($ai_path_parts));
            if ($last_path_seg === $last_ai_seg) {
              $ai_has_exact_match = TRUE;
            }
          }
          
          // Prefer fallback if it has exact match and AI doesn't, or if fallback confidence is significantly higher.
          if (($fallback_has_exact_match && !$ai_has_exact_match) || 
              ($fallback_result['confidence_score'] > $result['confidence_score'] + 10)) {
            \Drupal::logger('ai_404_redirect')->info('Fallback found better match than AI for path: @path. AI: @ai_path (conf: @ai_conf), Fallback: @fb_path (conf: @fb_conf). Using fallback.', [
              '@path' => $path,
              '@ai_path' => $result['suggested_path'],
              '@ai_conf' => $result['confidence_score'],
              '@fb_path' => $fallback_result['suggested_path'],
              '@fb_conf' => $fallback_result['confidence_score'],
            ]);
            $result = $fallback_result;
            $result['reasoning'] = 'Fallback algorithm found better match: ' . $result['reasoning'];
          }
        }
      }
      
      // Update last AI call time on successful call.
      $current_time = \Drupal::time()->getRequestTime();
      $this->state->set('ai_404_redirect.last_ai_call', $current_time);
      // Track successful calls to distinguish real rate limits from auth/quota issues.
      $this->state->set('ai_404_redirect.last_successful_call', $current_time);
      
      // Log the parsed result.
      \Drupal::logger('ai_404_redirect')->info('AI analysis result for path: @path - should_redirect: @redirect, confidence: @confidence, suggested_path: @suggested', [
        '@path' => $path,
        '@redirect' => $result['should_redirect'] ? 'yes' : 'no',
        '@confidence' => $result['confidence_score'],
        '@suggested' => $result['suggested_path'] ?? 'none',
      ]);
      
      return $result;
    }
    catch (\Exception $e) {
      $error_message = $e->getMessage();
      $current_time = \Drupal::time()->getRequestTime();
      
      // Check for specific error types. Be more careful about rate limit detection.
      // "Request rate limit has been exceeded" from OpenAI often means auth/quota issues, not actual rate limits.
      // If we see this error and have no successful calls, it's almost certainly auth/quota.
      
      $is_auth_error = stripos($error_message, 'api key') !== FALSE ||
                       stripos($error_message, 'authentication') !== FALSE ||
                       stripos($error_message, 'unauthorized') !== FALSE ||
                       stripos($error_message, 'invalid') !== FALSE ||
                       stripos($error_message, '401') !== FALSE;
      
      // Check if we've had any successful AI calls recently.
      $last_successful_call = $this->state->get('ai_404_redirect.last_successful_call', 0);
      $has_recent_success = ($current_time - $last_successful_call) < 3600; // Within last hour
      
      // "Request rate limit has been exceeded" without recent success = likely auth/quota issue
      $suspicious_rate_limit = stripos($error_message, 'rate limit') !== FALSE && 
                               stripos($error_message, 'exceeded') !== FALSE &&
                               !$has_recent_success;
      
      $is_quota_error = $suspicious_rate_limit || // Treat suspicious rate limit as quota
                        (stripos($error_message, 'quota') !== FALSE && stripos($error_message, 'exceeded') !== FALSE) ||
                        stripos($error_message, 'insufficient') !== FALSE ||
                        stripos($error_message, 'billing') !== FALSE ||
                        stripos($error_message, 'payment') !== FALSE;
      
      // Only treat as rate limit if it's clearly a rate limit (429 or "too many requests")
      // AND we've had recent successful calls (meaning it's a real rate limit, not auth/quota).
      $is_rate_limit = (stripos($error_message, 'too many requests') !== FALSE || 
                        stripos($error_message, '429') !== FALSE) &&
                       !$is_auth_error && !$is_quota_error && $has_recent_success;
      
      $is_temporary = stripos($error_message, 'timeout') !== FALSE ||
                      stripos($error_message, 'network') !== FALSE ||
                      stripos($error_message, 'temporary') !== FALSE ||
                      stripos($error_message, '503') !== FALSE;

      // Prioritize auth/quota errors - these are configuration issues, not rate limits.
      if ($is_auth_error || $is_quota_error) {
        // Authentication or quota errors - disable AI for 10 minutes and log clearly.
        $this->state->set('ai_404_redirect.last_ai_call', \Drupal::time()->getRequestTime() + 600);
        $this->state->set('ai_404_redirect.ai_disabled_reason', $is_auth_error ? 'authentication' : 'quota');
        \Drupal::logger('ai_404_redirect')->error('AI provider configuration issue for path @path: @message. This indicates missing/invalid API key or insufficient credits/quota. AI disabled for 10 minutes. Please check configuration at /admin/config/ai/models. Using fallback analysis.', [
          '@path' => $path,
          '@message' => $error_message,
        ]);
      }
      elseif ($is_rate_limit) {
        // Actual rate limit - wait 60 seconds.
        $this->state->set('ai_404_redirect.last_ai_call', \Drupal::time()->getRequestTime() + 60);
        \Drupal::logger('ai_404_redirect')->warning('AI rate limit hit for path @path. Waiting 60 seconds before next AI call. Using fallback analysis.', [
          '@path' => $path,
        ]);
      }
      elseif ($is_temporary) {
        \Drupal::logger('ai_404_redirect')->warning('AI analysis temporarily unavailable for path @path: @message. Using fallback analysis.', [
          '@path' => $path,
          '@message' => $error_message,
        ]);
      }
      else {
        // Unknown error - log but don't throttle too aggressively.
        // If we see this repeatedly, it might be a config issue.
        \Drupal::logger('ai_404_redirect')->error('AI analysis failed for path @path: @message. Using fallback analysis.', [
          '@path' => $path,
          '@message' => $error_message,
        ]);
      }

      // Fallback to simple matching if AI fails.
      $fallback_result = $this->fallbackAnalysis($path, $candidates, $is_bot);
      
      // Update reasoning based on error type.
      if ($is_auth_error || $is_quota_error) {
        $fallback_result['reasoning'] = 'AI configuration issue - using fallback keyword matching. ' . ($fallback_result['reasoning'] ?? '');
      }
      elseif ($is_rate_limit) {
        $fallback_result['reasoning'] = 'Rate limit exceeded - using fallback keyword matching. ' . ($fallback_result['reasoning'] ?? '');
      }
      
      \Drupal::logger('ai_404_redirect')->info('Using fallback analysis for path: @path - should_redirect: @redirect, confidence: @confidence, suggested_path: @suggested', [
        '@path' => $path,
        '@redirect' => $fallback_result['should_redirect'] ? 'yes' : 'no',
        '@confidence' => $fallback_result['confidence_score'],
        '@suggested' => $fallback_result['suggested_path'] ?? 'none',
      ]);
      
      return $fallback_result;
    }
  }

  /**
   * Builds the AI prompt.
   *
   * @param string $path
   *   The 404 path.
   * @param array $candidates
   *   Candidate pages.
   * @param bool $is_bot
   *   Whether this is a bot.
   *
   * @return string
   *   The prompt text.
   */
  protected function buildPrompt($path, array $candidates, $is_bot) {
    // Prioritize candidates that match keywords from the 404 path.
    $path_parts = array_filter(explode('/', trim($path, '/')));
    $prioritized_candidates = $this->prioritizeCandidates($candidates, $path_parts);
    
    $candidates_text = '';
    // Send more candidates (100 instead of 50) to give AI better context.
    foreach (array_slice($prioritized_candidates, 0, 100) as $candidate) {
      $alias_display = $candidate['alias'] ?? '/node/' . $candidate['nid'];
      $candidates_text .= "- Title: {$candidate['title']}, Path: {$alias_display}, ID: {$candidate['nid']}\n";
    }

    $last_segment = !empty($path_parts) ? end($path_parts) : '';
    
    $prompt = "A user encountered a 404 error for the path: {$path}\n\n";
    
    if ($is_bot) {
      $prompt .= "This request appears to be from a web crawler/bot.\n\n";
    }
    
    $prompt .= "Available pages on the website:\n{$candidates_text}\n\n";
    $prompt .= "Analyze this 404 error and provide a JSON response with the following structure:\n";
    $prompt .= "{\n";
    $prompt .= "  \"should_redirect\": true/false (true if this should be redirected, false if it should be ignored - ignore if it's clearly a bot just checking if page exists),\n";
    $prompt .= "  \"confidence_score\": 0-100 (confidence that a redirect is appropriate),\n";
    $prompt .= "  \"suggested_node_id\": node_id or null (the ID of the best matching page),\n";
    $prompt .= "  \"reasoning\": \"brief explanation of why this redirect makes sense or why it should be ignored\"\n";
    $prompt .= "}\n\n";
    $prompt .= "CRITICAL MATCHING PRIORITIES (in order of importance):\n";
    $prompt .= "1. **Exact word match in the same position** - If the 404 path has segments like 'who-we-were/careers', prioritize pages with 'careers' in the same position (e.g., 'who-we-are/careers' is MUCH better than 'who-we-are/financial-reports')\n";
    $prompt .= "2. **Last segment match** - The last segment of the path is especially important. If the 404 ends with 'careers', strongly prefer pages that also end with 'careers'\n";
    $prompt .= "3. **Path structure similarity** - Paths with similar structure (same number of segments, similar words in same positions) are better matches\n";
    $prompt .= "4. **Typo detection** - If segments are similar (e.g., 'were' vs 'are'), this is likely a typo and should match the corrected version\n";
    $prompt .= "5. **Keyword matching** - Only use general keyword matching as a last resort\n\n";
    $prompt .= "Additional considerations:\n";
    $prompt .= "- If this is a bot request checking for page existence, set should_redirect to false\n";
    $prompt .= "- Only suggest a redirect if there's a clear, logical match\n";
    $prompt .= "- Confidence should be high (80+) only for very clear matches\n";
    if (!empty($last_segment)) {
      $prompt .= "- The last segment '{$last_segment}' is particularly important - prioritize pages with this exact word in the same position\n";
    }

    return $prompt;
  }

  /**
   * Parses the AI response.
   *
   * @param mixed $response
   *   The AI response.
   * @param array $candidates
   *   Candidate pages.
   *
   * @return array
   *   Parsed analysis.
   */
  protected function parseAIResponse($response, array $candidates) {
    $default = [
      'should_redirect' => FALSE,
      'confidence_score' => 0,
      'suggested_node_id' => NULL,
      'suggested_path' => NULL,
      'reasoning' => 'AI analysis unavailable',
    ];

    if (empty($response) || !is_string($response)) {
      \Drupal::logger('ai_404_redirect')->warning('Empty or invalid AI response received.');
      return $default;
    }

    // Try to extract JSON from the response - use a more flexible regex.
    // Look for JSON object that might span multiple lines.
    if (preg_match('/\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}/s', $response, $matches)) {
      $json_string = $matches[0];
      $json = json_decode($json_string, TRUE);
      
      if (json_last_error() === JSON_ERROR_NONE && is_array($json)) {
        $result = $default;
        $result['should_redirect'] = !empty($json['should_redirect']);
        $result['confidence_score'] = (float) ($json['confidence_score'] ?? 0);
        $result['suggested_node_id'] = !empty($json['suggested_node_id']) ? (int) $json['suggested_node_id'] : NULL;
        $result['reasoning'] = $json['reasoning'] ?? 'No reasoning provided';

        // Find the suggested path.
        if ($result['suggested_node_id']) {
          foreach ($candidates as $candidate) {
            if (isset($candidate['nid']) && $candidate['nid'] == $result['suggested_node_id']) {
              $result['suggested_path'] = $candidate['alias'] ?? NULL;
              break;
            }
          }
          
          // If we didn't find the path in candidates, try to get it from the node.
          if (empty($result['suggested_path']) && $result['suggested_node_id']) {
            try {
              $node = $this->entityTypeManager->getStorage('node')->load($result['suggested_node_id']);
              if ($node) {
                $result['suggested_path'] = $this->pathAliasManager->getAliasByPath('/node/' . $result['suggested_node_id']);
              }
            }
            catch (\Exception $e) {
              \Drupal::logger('ai_404_redirect')->warning('Could not load node @nid for suggested path: @message', [
                '@nid' => $result['suggested_node_id'],
                '@message' => $e->getMessage(),
              ]);
            }
          }
        }

        return $result;
      }
      else {
        \Drupal::logger('ai_404_redirect')->warning('Failed to parse AI JSON response. JSON error: @error. Response: @response', [
          '@error' => json_last_error_msg(),
          '@response' => substr($response, 0, 500),
        ]);
      }
    }
    else {
      \Drupal::logger('ai_404_redirect')->warning('No JSON found in AI response. Response: @response', [
        '@response' => substr($response, 0, 500),
      ]);
    }

    return $default;
  }

  /**
   * Fallback analysis when AI is unavailable.
   *
   * @param string $path
   *   The 404 path.
   * @param array $candidates
   *   Candidate pages.
   * @param bool $is_bot
   *   Whether this is a bot.
   *
   * @return array
   *   Analysis results.
   */
  protected function fallbackAnalysis($path, array $candidates, $is_bot) {
    // Normalize the input path.
    $path_normalized = trim($path, '/');
    $path_parts = array_filter(explode('/', $path_normalized));
    
    if (empty($path_parts)) {
      return [
        'should_redirect' => FALSE,
        'confidence_score' => 0,
        'suggested_node_id' => NULL,
        'suggested_path' => NULL,
        'reasoning' => $is_bot ? 'Bot request - ignore' : 'No keywords in path for matching',
      ];
    }

    $best_match = NULL;
    $best_score = 0;
    $best_reasoning = '';

    foreach ($candidates as $candidate) {
      $alias = $candidate['alias'] ?? NULL;
      $title = $candidate['title'] ?? '';
      
      // Get alias if not set.
      if (empty($alias) && !empty($candidate['nid'])) {
        try {
          $alias = $this->pathAliasManager->getAliasByPath('/node/' . $candidate['nid']);
          if ($alias == '/node/' . $candidate['nid']) {
            $alias = NULL;
          }
        }
        catch (\Exception $e) {
          // Ignore errors.
        }
      }
      
      if (empty($alias)) {
        continue; // Skip candidates without aliases.
      }
      
      $alias_normalized = trim($alias, '/');
      $alias_parts = array_filter(explode('/', $alias_normalized));
      
      $score = 0;
      $reasoning_parts = [];
      
      // 1. Path structure similarity (highest priority).
      // Compare path segments in order - this is the most important factor.
      $path_structure_score = $this->calculatePathStructureScore($path_parts, $alias_parts);
      if ($path_structure_score > 0) {
        $score += $path_structure_score * 80; // Very heavy weight for path structure
        $reasoning_parts[] = "path structure match (" . round($path_structure_score, 3) . ")";
      }
      
      // 2. Exact word match in same position (especially last segment).
      // This catches cases like /who-we-were/careers -> /who-we-are/careers
      $exact_position_matches = 0;
      $last_segment_match = FALSE;
      for ($i = 0; $i < min(count($path_parts), count($alias_parts)); $i++) {
        $part1 = strtolower($path_parts[$i]);
        $part2 = strtolower($alias_parts[$i]);
        if ($part1 === $part2) {
          $exact_position_matches++;
          if ($i === min(count($path_parts), count($alias_parts)) - 1) {
            $last_segment_match = TRUE;
          }
        }
      }
      if ($exact_position_matches > 0) {
        // Big bonus for exact matches in same position, especially if last segment matches.
        $position_bonus = $exact_position_matches * 30;
        if ($last_segment_match && count($path_parts) === count($alias_parts)) {
          $position_bonus += 50; // Extra bonus for matching last segment in same-length paths
        }
        $score += $position_bonus;
        $reasoning_parts[] = "exact position matches ({$exact_position_matches})" . ($last_segment_match ? " [last segment match]" : "");
      }
      
      // 3. String similarity for the full path.
      $path_similarity = $this->calculateStringSimilarity($path_normalized, $alias_normalized);
      if ($path_similarity > 0.5) {
        $score += $path_similarity * 20; // Reduced weight since structure is more important
        $reasoning_parts[] = "path similarity (" . round($path_similarity * 100) . "%)";
      }
      
      // 4. Check if one path is a prefix of another (e.g., /loans/land is prefix of /loans/land01).
      $prefix_score = $this->calculatePrefixMatch($path_normalized, $alias_normalized);
      if ($prefix_score > 0) {
        $score += $prefix_score * 30; // Reduced weight
        $reasoning_parts[] = "prefix match (" . round($prefix_score, 3) . ")";
      }
      
      // 5. Keyword matching in alias (lower priority but still useful).
      $keyword_score = $this->calculateKeywordMatch($path_parts, $alias_parts, $alias_normalized);
      if ($keyword_score > 0) {
        $score += $keyword_score * 8; // Reduced weight
        $reasoning_parts[] = "keyword match ({$keyword_score})";
      }
      
      // 6. Title matching (lowest priority).
      if (!empty($title)) {
        $title_lower = strtolower($title);
        $title_score = 0;
        foreach ($path_parts as $part) {
          $part_lower = strtolower($part);
          if (strpos($title_lower, $part_lower) !== FALSE) {
            $title_score += 3; // Reduced weight
          }
        }
        if ($title_score > 0) {
          $score += $title_score;
          $reasoning_parts[] = "title match ({$title_score})";
        }
      }

      if ($score > $best_score) {
        $best_score = $score;
        $best_match = $candidate;
        $best_match['alias'] = $alias; // Ensure alias is set.
        $best_reasoning = implode(', ', $reasoning_parts);
      }
    }

    $suggested_path = $best_match['alias'] ?? NULL;

    // Lower threshold for fallback - any match is better than nothing.
    $min_score = 20;
    $should_redirect = !$is_bot && $best_score >= $min_score && !empty($suggested_path);
    
    // Calculate confidence - scale it better, cap at 100.
    $confidence = $best_score > 0 ? min(100, max(20, (int) ($best_score * 1.5))) : 0;

    return [
      'should_redirect' => $should_redirect,
      'confidence_score' => $confidence,
      'suggested_node_id' => $best_match['nid'] ?? NULL,
      'suggested_path' => $suggested_path,
      'reasoning' => $is_bot ? 'Bot request - ignore' : ($best_match ? "Fallback matching: {$best_reasoning} (score: {$best_score})" : 'No matching pages found'),
    ];
  }

  /**
   * Calculates path structure similarity score.
   *
   * Compares path segments in order and gives higher scores for matching segments.
   * Prioritizes exact matches, especially in the last position.
   *
   * @param array $path1_parts
   *   First path parts.
   * @param array $path2_parts
   *   Second path parts.
   *
   * @return float
   *   Score between 0 and 1.
   */
  protected function calculatePathStructureScore(array $path1_parts, array $path2_parts) {
    if (empty($path1_parts) || empty($path2_parts)) {
      return 0;
    }
    
    $score = 0;
    $max_parts = max(count($path1_parts), count($path2_parts));
    $min_parts = min(count($path1_parts), count($path2_parts));
    
    // Compare segments in order.
    for ($i = 0; $i < $min_parts; $i++) {
      $part1 = strtolower($path1_parts[$i]);
      $part2 = strtolower($path2_parts[$i]);
      
      // Check if this is the last segment - give it extra weight.
      $is_last = ($i === $min_parts - 1) && (count($path1_parts) === count($path2_parts));
      $position_weight = $is_last ? 3.0 : 1.0; // Last segment match is 3x more important
      
      if ($part1 === $part2) {
        // Exact match in same position gets highest score.
        // Last position gets even more weight.
        $score += ($position_weight * 2.0) / ($i + 1);
      }
      else {
        // Check for typo similarity (e.g., "were" vs "are").
        $typo_similarity = $this->calculateTypoSimilarity($part1, $part2);
        if ($typo_similarity > 0.7) {
          // High typo similarity - treat almost like exact match.
          $score += ($position_weight * 1.5 * $typo_similarity) / ($i + 1);
        }
        elseif (strpos($part1, $part2) !== FALSE || strpos($part2, $part1) !== FALSE) {
          // Partial match (one contains the other).
          $score += ($position_weight * 0.5) / ($i + 1);
        }
      }
    }
    
    // Bonus: if paths have same length and last segment matches, boost score.
    if (count($path1_parts) === count($path2_parts) && $min_parts > 0) {
      $last1 = strtolower($path1_parts[count($path1_parts) - 1]);
      $last2 = strtolower($path2_parts[count($path2_parts) - 1]);
      if ($last1 === $last2) {
        $score += 1.0; // Big bonus for matching last segment
      }
      elseif ($this->calculateTypoSimilarity($last1, $last2) > 0.7) {
        $score += 0.7; // Bonus for similar last segment
      }
    }
    
    // Normalize by path length, but don't penalize too much for length differences.
    return min(1.0, $score / max(2.0, $max_parts));
  }
  
  /**
   * Calculates typo similarity between two words.
   *
   * Uses Levenshtein distance to detect common typos.
   *
   * @param string $word1
   *   First word.
   * @param string $word2
   *   Second word.
   *
   * @return float
   *   Similarity score between 0 and 1.
   */
  protected function calculateTypoSimilarity($word1, $word2) {
    if ($word1 === $word2) {
      return 1.0;
    }
    
    $len1 = strlen($word1);
    $len2 = strlen($word2);
    
    // If lengths are very different, similarity is low.
    if (abs($len1 - $len2) > 2) {
      return 0;
    }
    
    // Use Levenshtein distance.
    $distance = levenshtein($word1, $word2);
    $max_len = max($len1, $len2);
    
    if ($max_len === 0) {
      return 1.0;
    }
    
    // Calculate similarity: 1 - (distance / max_length)
    // For short words (3-4 chars), allow 1-2 character difference.
    // For longer words, allow more difference.
    $max_allowed_distance = $max_len <= 4 ? 1 : min(2, floor($max_len / 3));
    
    if ($distance <= $max_allowed_distance) {
      $similarity = 1.0 - ($distance / $max_len);
      return max(0, $similarity);
    }
    
    return 0;
  }

  /**
   * Calculates string similarity using Levenshtein-like approach.
   *
   * @param string $str1
   *   First string.
   * @param string $str2
   *   Second string.
   *
   * @return float
   *   Similarity score between 0 and 1.
   */
  protected function calculateStringSimilarity($str1, $str2) {
    $str1 = strtolower($str1);
    $str2 = strtolower($str2);
    
    if ($str1 === $str2) {
      return 1.0;
    }
    
    if (empty($str1) || empty($str2)) {
      return 0;
    }
    
    // Use similar_text for a quick similarity check.
    $similarity = 0;
    similar_text($str1, $str2, $similarity);
    return $similarity / 100;
  }

  /**
   * Calculates prefix match score.
   *
   * Checks if one path is a prefix of another (e.g., /loans/land is prefix of /loans/land01).
   *
   * @param string $path1
   *   First path.
   * @param string $path2
   *   Second path.
   *
   * @return float
   *   Score between 0 and 1.
   */
  protected function calculatePrefixMatch($path1, $path2) {
    $path1 = strtolower($path1);
    $path2 = strtolower($path2);
    
    // Check if path1 is a prefix of path2 or vice versa.
    if (strpos($path2, $path1) === 0 && strlen($path2) > strlen($path1)) {
      // path1 is a prefix of path2 (e.g., /loans/land is prefix of /loans/land01).
      $ratio = strlen($path1) / strlen($path2);
      return $ratio * 0.8; // Good match but not perfect.
    }
    elseif (strpos($path1, $path2) === 0 && strlen($path1) > strlen($path2)) {
      // path2 is a prefix of path1 (e.g., /loans/land01 contains /loans/land).
      $ratio = strlen($path2) / strlen($path1);
      return $ratio * 0.8;
    }
    
    return 0;
  }

  /**
   * Prioritizes candidates based on keyword matching with the 404 path.
   *
   * @param array $candidates
   *   All candidate pages.
   * @param array $path_parts
   *   Path segments from the 404 path.
   *
   * @return array
   *   Prioritized candidates array.
   */
  protected function prioritizeCandidates(array $candidates, array $path_parts) {
    if (empty($path_parts)) {
      return $candidates;
    }
    
    $prioritized = [];
    $normal_priority = [];
    
    foreach ($candidates as $candidate) {
      $alias = $candidate['alias'] ?? NULL;
      $title = strtolower($candidate['title'] ?? '');
      $score = 0;
      
      // Check if any path part matches the alias or title.
      foreach ($path_parts as $part) {
        $part_lower = strtolower($part);
        
        // Check alias.
        if ($alias) {
          $alias_lower = strtolower($alias);
          $alias_parts = array_filter(explode('/', trim($alias, '/')));
          
          // Exact match in same position gets highest score.
          foreach ($alias_parts as $idx => $alias_part) {
            if (isset($path_parts[$idx]) && strtolower($alias_part) === strtolower($path_parts[$idx])) {
              $score += 100; // Very high score for exact position match
            }
          }
          
          // Last segment match is especially important.
          if (!empty($alias_parts) && !empty($path_parts)) {
            $last_alias = strtolower(end($alias_parts));
            $last_path = strtolower(end($path_parts));
            if ($last_alias === $last_path) {
              $score += 50; // Big bonus for matching last segment
            }
          }
          
          if (strpos($alias_lower, $part_lower) !== FALSE) {
            $score += 10;
          }
        }
        
        // Check title.
        if (strpos($title, $part_lower) !== FALSE) {
          $score += 5;
        }
      }
      
      if ($score > 0) {
        $candidate['_priority_score'] = $score;
        $prioritized[] = $candidate;
      }
      else {
        $normal_priority[] = $candidate;
      }
    }
    
    // Sort prioritized by score (highest first).
    usort($prioritized, function($a, $b) {
      return ($b['_priority_score'] ?? 0) - ($a['_priority_score'] ?? 0);
    });
    
    // Return prioritized first, then normal candidates.
    return array_merge($prioritized, $normal_priority);
  }

  /**
   * Generates fuzzy variations of a keyword to catch common typos.
   *
   * This handles patterns like:
   * - Double letters: "offed" -> "offer", "carreers" -> "careers"
   * - Missing letters: "offr" -> "offer"
   * - Extra letters: "offfer" -> "offer"
   * - Common character substitutions
   *
   * @param string $keyword
   *   The keyword to generate variations for.
   *
   * @return array
   *   Array of fuzzy variations.
   */
  protected function generateFuzzyVariations($keyword) {
    $variations = [];
    $keyword_lower = strtolower($keyword);
    
    // 1. Remove double letters (e.g., "offed" -> "ofed", "carreers" -> "careers").
    // This catches cases where someone typed a double letter.
    $no_doubles = preg_replace('/(.)\1+/', '$1', $keyword_lower);
    if ($no_doubles !== $keyword_lower) {
      $variations[] = $no_doubles;
    }
    
    // 2. Add common missing vowels (e.g., "offr" -> "offer").
    // Check if removing a vowel creates a known pattern.
    $vowels = ['a', 'e', 'i', 'o', 'u'];
    foreach ($vowels as $vowel) {
      // Try inserting vowel in common positions.
      if (strlen($keyword_lower) >= 3) {
        // Insert after first consonant cluster.
        if (preg_match('/^([bcdfghjklmnpqrstvwxyz]+)([aeiou]?)(.*)$/i', $keyword_lower, $matches)) {
          if (empty($matches[2])) {
            // No vowel after consonant cluster, try adding one.
            $variations[] = $matches[1] . $vowel . $matches[3];
          }
        }
      }
    }
    
    // 3. Remove extra repeated letters (e.g., "offfer" -> "offer").
    // This is similar to #1 but handles triple+ letters.
    $no_triples = preg_replace('/(.)\1{2,}/', '$1', $keyword_lower);
    if ($no_triples !== $keyword_lower && $no_triples !== $no_doubles) {
      $variations[] = $no_triples;
    }
    
    // 4. Common character substitutions (e.g., 'ph' -> 'f', 'ck' -> 'k').
    $substitutions = [
      'ph' => 'f',
      'ck' => 'k',
      'qu' => 'kw',
      'x' => 'ks',
    ];
    foreach ($substitutions as $from => $to) {
      if (strpos($keyword_lower, $from) !== FALSE) {
        $variations[] = str_replace($from, $to, $keyword_lower);
      }
      // Also try reverse.
      if (strpos($keyword_lower, $to) !== FALSE) {
        $variations[] = str_replace($to, $from, $keyword_lower);
      }
    }
    
    // 5. Remove common suffixes and try variations (e.g., "offered" -> "offer").
    $suffixes = ['ed', 'ing', 'er', 'ers', 's', 'es'];
    foreach ($suffixes as $suffix) {
      if (strlen($keyword_lower) > strlen($suffix) && substr($keyword_lower, -strlen($suffix)) === $suffix) {
        $base = substr($keyword_lower, 0, -strlen($suffix));
        if (strlen($base) >= 3) {
          $variations[] = $base;
        }
      }
    }
    
    // Remove duplicates and filter out very short variations.
    $variations = array_unique(array_filter($variations, function($var) use ($keyword_lower) {
      return strlen($var) >= 3 && $var !== $keyword_lower;
    }));
    
    return $variations;
  }

  /**
   * Calculates keyword matching score.
   *
   * @param array $path_parts
   *   Path parts from the 404.
   * @param array $alias_parts
   *   Alias path parts.
   * @param string $alias_normalized
   *   Normalized alias string.
   *
   * @return float
   *   Score.
   */
  protected function calculateKeywordMatch(array $path_parts, array $alias_parts, $alias_normalized) {
    $score = 0;
    $alias_lower = strtolower($alias_normalized);
    
    foreach ($path_parts as $part) {
      $part_lower = strtolower($part);
      
      // Check if keyword appears in alias parts.
      foreach ($alias_parts as $alias_part) {
        $alias_part_lower = strtolower($alias_part);
        if ($alias_part_lower === $part_lower) {
          $score += 2; // Exact match in parts.
        }
        elseif (strpos($alias_part_lower, $part_lower) !== FALSE || strpos($part_lower, $alias_part_lower) !== FALSE) {
          $score += 1; // Partial match.
        }
      }
      
      // Also check in full alias string.
      if (strpos($alias_lower, $part_lower) !== FALSE) {
        $score += 0.5;
      }
    }
    
    return $score;
  }

  /**
   * Saves a redirect suggestion.
   *
   * @param string $path
   *   The 404 path.
   * @param array $analysis
   *   The analysis results.
   * @param string $user_agent
   *   The user agent.
   * @param bool $is_bot
   *   Whether this is a bot.
   * @param string $client_ip
   *   The client IP address.
   */
  protected function saveSuggestion($path, array $analysis, $user_agent, $is_bot, $client_ip = '') {
    $config = $this->configFactory->get('ai_404_redirect.settings');
    $minimum_count = (int) ($config->get('minimum_404_count') ?? 1);
    $confidence_threshold = $config->get('auto_approve_confidence_threshold') ?? 80;
    
    $status = 'pending';
    // Only auto-approve if confidence is high AND we've hit the minimum count.
    if ($analysis['should_redirect'] && 
        $analysis['confidence_score'] >= $confidence_threshold && 
        !$is_bot &&
        1 >= $minimum_count) {
      $status = 'auto_approved';
    }

    $this->database->insert('ai_404_redirect_suggestions')
      ->fields([
        'source_path' => $path,
        'suggested_path' => $analysis['suggested_path'] ?? NULL,
        'suggested_node_id' => $analysis['suggested_node_id'] ?? NULL,
        'confidence_score' => $analysis['confidence_score'],
        'should_redirect' => $analysis['should_redirect'] ? 1 : 0,
        'is_bot' => $is_bot ? 1 : 0,
        'user_agent' => substr($user_agent, 0, 500),
        'client_ip' => substr($client_ip, 0, 45),
        'status' => $status,
        'ai_reasoning' => $analysis['reasoning'] ?? '',
        '404_count' => 1,
        'created' => \Drupal::time()->getRequestTime(),
        'updated' => \Drupal::time()->getRequestTime(),
      ])
      ->execute();
  }

  /**
   * Updates an existing redirect suggestion.
   *
   * @param int $suggestion_id
   *   The suggestion ID.
   * @param array $analysis
   *   The analysis results.
   * @param string $user_agent
   *   The user agent.
   * @param bool $is_bot
   *   Whether this is a bot.
   * @param string $client_ip
   *   The client IP address.
   */
  protected function updateSuggestion($suggestion_id, array $analysis, $user_agent, $is_bot, $client_ip = '') {
    $config = $this->configFactory->get('ai_404_redirect.settings');
    $minimum_count = (int) ($config->get('minimum_404_count') ?? 1);
    $confidence_threshold = $config->get('auto_approve_confidence_threshold') ?? 80;
    
    // Get current count.
    $current = $this->database->select('ai_404_redirect_suggestions', 's')
      ->fields('s', ['404_count', 'status'])
      ->condition('id', $suggestion_id)
      ->execute()
      ->fetchObject();
    
    // Note: Can't use ->404_count directly, use array access.
    $current_array = (array) $current;
    $new_count = (isset($current_array['404_count']) ? (int) $current_array['404_count'] : 1) + 1;
    $status = $current->status ?? 'pending';
    
    // Update status if we've hit threshold and confidence is high.
    // Check if status should change from pending to auto_approved.
    if ($status === 'pending' && 
        $analysis['should_redirect'] && 
        $analysis['confidence_score'] >= $confidence_threshold && 
        !$is_bot &&
        $new_count >= $minimum_count &&
        !empty($analysis['suggested_path'])) {
      $status = 'auto_approved';
    }

    $this->database->update('ai_404_redirect_suggestions')
      ->fields([
        'suggested_path' => $analysis['suggested_path'] ?? NULL,
        'suggested_node_id' => $analysis['suggested_node_id'] ?? NULL,
        'confidence_score' => $analysis['confidence_score'],
        'should_redirect' => $analysis['should_redirect'] ? 1 : 0,
        'status' => $status,
        'ai_reasoning' => $analysis['reasoning'] ?? '',
        '404_count' => $new_count,
        'client_ip' => substr($client_ip, 0, 45), // Update with latest IP
        'updated' => \Drupal::time()->getRequestTime(),
      ])
      ->condition('id', $suggestion_id)
      ->execute();
  }

  /**
   * Creates a redirect entity.
   *
   * @param string $source_path
   *   The source path.
   * @param array $analysis
   *   The analysis results.
   *
   * @return int|null
   *   The redirect ID or NULL on failure.
   */
  protected function createRedirect($source_path, array $analysis) {
    if (empty($analysis['suggested_path'])) {
      return NULL;
    }

    try {
      $redirect_storage = $this->entityTypeManager->getStorage('redirect');
      
      // Validate and normalize suggested path for URI.
      $suggested_path = $analysis['suggested_path'];
      if (empty($suggested_path)) {
        \Drupal::logger('ai_404_redirect')->error('Cannot create redirect: suggested_path is empty for source: @source', [
          '@source' => $source_path,
        ]);
        return NULL;
      }
      
      // Remove 'internal:' prefix if present (setRedirect handles this).
      $suggested_path = preg_replace('/^internal:/', '', $suggested_path);
      
      // Ensure suggested path starts with / for internal paths.
      if (!str_starts_with($suggested_path, '/')) {
        $suggested_path = '/' . $suggested_path;
      }
      
      // Validate that the destination path exists (basic check).
      try {
        $destination_url = \Drupal::service('path.validator')->getUrlIfValid($suggested_path);
        if (!$destination_url) {
          \Drupal::logger('ai_404_redirect')->warning('Suggested path may not be valid: @path. Creating redirect anyway.', [
            '@path' => $suggested_path,
          ]);
        }
      }
      catch (\Exception $e) {
        // Path validator might throw exceptions, but we'll continue anyway.
      }
      
      // The Redirect module expects the source path WITHOUT leading slash.
      // Clean the path (remove query string and fragment).
      $source_path_clean = strtok($source_path, '?#');
      $source_path_clean = ltrim($source_path_clean, '/');
      
      if (empty($source_path_clean)) {
        \Drupal::logger('ai_404_redirect')->error('Cannot create redirect: source path is empty after cleaning');
        return NULL;
      }
      
      // Check if redirect already exists.
      $existing = $redirect_storage->loadByProperties([
        'redirect_source__path' => $source_path_clean,
      ]);

      if (!empty($existing)) {
        return NULL;
      }
      
      // Create new redirect entity.
      $redirect = $redirect_storage->create([
        'status_code' => 301,
        'language' => 'und',
      ]);
      
      // Set the source path properly - Redirect module expects no leading slash.
      $redirect->get('redirect_source')->set(0, ['path' => $source_path_clean]);
      
      // Set the redirect destination using the proper method.
      // The setRedirect() method handles URI formatting correctly.
      // It expects the path without 'internal:' prefix and handles it automatically.
      $redirect->setRedirect($suggested_path);
      
      // Verify the redirect URL was set correctly.
      $redirect_url = $redirect->getRedirectUrl();
      if (!$redirect_url) {
        \Drupal::logger('ai_404_redirect')->error('Failed to set redirect URL for path: @source -> @dest', [
          '@source' => $source_path_clean,
          '@dest' => $suggested_path,
        ]);
        return NULL;
      }
      
      // Validate the redirect before saving.
      $violations = $redirect->validate();
      if ($violations->count() > 0) {
        $messages = [];
        foreach ($violations as $violation) {
          $messages[] = $violation->getMessage();
        }
        \Drupal::logger('ai_404_redirect')->error('Redirect validation failed: @messages', [
          '@messages' => implode(', ', $messages),
        ]);
        return NULL;
      }

      $redirect->save();
      $redirect_id = $redirect->id();

      // Update the most recent suggestion record for this path.
      // First, get the ID of the most recent suggestion.
      $suggestion_id = $this->database->select('ai_404_redirect_suggestions', 's')
        ->fields('s', ['id'])
        ->condition('source_path', $source_path)
        ->orderBy('created', 'DESC')
        ->range(0, 1)
        ->execute()
        ->fetchField();
      
      if ($suggestion_id) {
        $updated = $this->database->update('ai_404_redirect_suggestions')
          ->fields([
            'redirect_id' => $redirect_id,
            'status' => 'auto_approved',
            'updated' => \Drupal::time()->getRequestTime(),
          ])
          ->condition('id', $suggestion_id)
          ->execute();
        
        // Invalidate cache tags so Views will refresh.
        \Drupal::service('cache_tags.invalidator')->invalidateTags([
          'ai_404_redirect_suggestion:' . $suggestion_id,
          'ai_404_redirect_suggestions',
        ]);
        
        \Drupal::logger('ai_404_redirect')->debug('Updated suggestion @id with redirect_id @redirect_id and status auto_approved', [
          '@id' => $suggestion_id,
          '@redirect_id' => $redirect_id,
        ]);
      }
      else {
        \Drupal::logger('ai_404_redirect')->warning('Could not find suggestion record to update for path: @path', [
          '@path' => $source_path,
        ]);
      }

      return $redirect_id;
    }
    catch (\Exception $e) {
      \Drupal::logger('ai_404_redirect')->error('Failed to create redirect: @message', [
        '@message' => $e->getMessage(),
      ]);
      return NULL;
    }
  }

}

