<?php

declare(strict_types=1);

namespace Drupal\visitors\Service;

use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\visitors\VisitorsSearchEngineInterface;

/**
 * Service for matching URLs to search engines.
 */
final class SearchEngineService implements VisitorsSearchEngineInterface {

  /**
   * The config factory service.
   *
   * @var \Drupal\Core\Config\ConfigFactoryInterface
   */
  private ConfigFactoryInterface $configFactory;

  /**
   * Cached search engines configuration.
   *
   * @var array|null
   */
  private ?array $searchEngines = NULL;

  /**
   * Constructs a new SearchEngineService.
   *
   * @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory
   *   The config factory service.
   */
  public function __construct(ConfigFactoryInterface $config_factory) {
    $this->configFactory = $config_factory;
  }

  /**
   * {@inheritdoc}
   */
  public function match(string $url): ?array {
    $parsed_url = parse_url($url);

    if (!$parsed_url || !isset($parsed_url['host'])) {
      return NULL;
    }

    $search_engines = $this->getSearchEngines();
    $host = strtolower($parsed_url['host']);
    $query_string = $parsed_url['query'] ?? '';
    $path = $parsed_url['path'] ?? '';

    foreach ($search_engines as $engine) {
      foreach ($engine['variants'] as $variant) {
        if ($this->matchVariant($host, $query_string, $path, $variant)) {
          return [
            'name' => $engine['label'],
            'variant' => $variant,
            'keyword' => $this->extractKeyword($query_string, $path, $variant),
          ];
        }
      }
    }

    return NULL;
  }

  /**
   * Gets the search engines configuration.
   *
   * @return array
   *   The search engines configuration.
   */
  private function getSearchEngines(): array {
    if ($this->searchEngines === NULL) {
      $config = $this->configFactory->get('visitors.search_engines');
      $this->searchEngines = $config->get('sites') ?? [];
    }

    return $this->searchEngines;
  }

  /**
   * Matches a URL against a search engine variant.
   *
   * @param string $host
   *   The hostname from the URL.
   * @param string $query_string
   *   The query string from the URL.
   * @param string $path
   *   The path from the URL.
   * @param array $variant
   *   The search engine variant configuration.
   *
   * @return bool
   *   TRUE if the variant matches, FALSE otherwise.
   */
  private function matchVariant(string $host, string $query_string, string $path, array $variant): bool {
    $urls = $variant['urls'] ?? [];

    foreach ($urls as $pattern) {
      if ($this->matchHostPattern($host, $pattern)) {
        // Check for hidden keywords that should exclude this match.
        if (isset($variant['hiddenkeyword'])) {
          foreach ($variant['hiddenkeyword'] as $hidden_pattern) {
            if ($this->matchesHiddenKeyword($query_string, $path, $hidden_pattern)) {
              return FALSE;
            }
          }
        }
        return TRUE;
      }
    }

    return FALSE;
  }

  /**
   * Matches a hostname against a pattern.
   *
   * @param string $host
   *   The hostname to match.
   * @param string $pattern
   *   The pattern to match against.
   *
   * @return bool
   *   TRUE if the pattern matches, FALSE otherwise.
   */
  private function matchHostPattern(string $host, string $pattern): bool {
    // Handle wildcard patterns like 'google.{}' or '{}.google.com'.
    $pattern = strtolower($pattern);

    if (strpos($pattern, '{}') !== FALSE) {
      // Replace {} placeholder with a temporary marker.
      $temp_marker = '__WILDCARD_PLACEHOLDER__';
      $regex_pattern = str_replace('{}', $temp_marker, $pattern);

      // Escape the entire pattern for regex.
      $regex_pattern = preg_quote($regex_pattern, '/');

      // Replace the temporary marker with the actual wildcard pattern.
      $regex_pattern = str_replace($temp_marker, '[a-z0-9.-]+', $regex_pattern);

      return (bool) preg_match('/^' . $regex_pattern . '$/', $host);
    }

    // Exact match.
    return $host === $pattern;
  }

  /**
   * Checks if the URL matches a hidden keyword pattern.
   *
   * @param string $query_string
   *   The query string from the URL.
   * @param string $path
   *   The path from the URL.
   * @param string $pattern
   *   The hidden keyword pattern.
   *
   * @return bool
   *   TRUE if matches hidden keyword, FALSE otherwise.
   */
  private function matchesHiddenKeyword(string $query_string, string $path, string $pattern): bool {
    // Handle regex patterns.
    if (preg_match('/^\/.*\/$/', $pattern)) {
      $full_url_part = $path . ($query_string ? '?' . $query_string : '');

      // Special handling for Google search patterns
      // Don't exclude /search URLs that have a 'q' parameter (actual searches)
      if ($pattern === '/\/search(\?.*)?/' && $path === '/search' && strpos($query_string, 'q=') !== FALSE) {
        return FALSE;
      }

      return (bool) preg_match($pattern, $full_url_part);
    }

    // Handle exact matches.
    return $query_string === $pattern || $path === $pattern;
  }

  /**
   * Extracts the search keyword from the URL.
   *
   * @param string $query_string
   *   The query string from the URL.
   * @param string $path
   *   The path from the URL.
   * @param array $variant
   *   The search engine variant configuration.
   *
   * @return string|null
   *   The extracted keyword, or NULL if not found.
   */
  private function extractKeyword(string $query_string, string $path, array $variant): ?string {
    $params = $variant['params'] ?? [];

    foreach ($params as $param) {
      // Handle regex parameter patterns.
      if (preg_match('/^\/.*\/$/', $param)) {
        $full_url_part = $path . ($query_string ? '?' . $query_string : '');
        if (preg_match($param, $full_url_part, $matches)) {
          return isset($matches[1]) ? urldecode($matches[1]) : NULL;
        }
      }
      // Handle query parameters.
      else {
        parse_str($query_string, $query_params);
        if (isset($query_params[$param]) && !empty($query_params[$param])) {
          return urldecode((string) $query_params[$param]);
        }
      }
    }

    return NULL;
  }

}
