<?php

declare(strict_types=1);

namespace Drupal\track_usage;

use Drupal\Component\Utility\UrlHelper;
use Drupal\Core\Entity\EntityInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Extension\ModuleHandlerInterface;
use Drupal\Core\File\FileSystemInterface;
use Drupal\Core\Language\LanguageManagerInterface;
use Drupal\Core\PathProcessor\InboundPathProcessorInterface;
use Drupal\Core\Session\AccountInterface;
use Drupal\Core\StreamWrapper\LocalStream;
use Drupal\Core\StreamWrapper\StreamWrapperInterface;
use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface;
use Drupal\Core\Url;
use Drupal\file\FileInterface;
use Drupal\file\FileRepositoryInterface;
use Drupal\language\LanguageNegotiatorInterface;
use Drupal\redirect\RedirectRepository;
use Symfony\Component\DependencyInjection\Attribute\Autowire;
use Symfony\Component\HttpFoundation\Exception\BadRequestException;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\Routing\Matcher\RequestMatcherInterface;
use Symfony\Component\Routing\RouterInterface;

/**
 * Tries to get an entity out of a URL.
 *
 * The method applies successive strategies against the passed URL to determine
 * if the URL links to an entity. Finally, if an entity can't be extracted, it
 * allows third-party to hook in and provide a custom match.
 */
class EntityGuesser implements EntityGuesserInterface {

  /**
   * A list of accepted protocols.
   *
   * @var list<string>
   */
  protected const ALLOWED_PROTOCOLS = ['http', 'https'];

  /**
   * A list of Drupal schemes.
   *
   * @var list<string>
   */
  protected const DRUPAL_SCHEMES = ['base', 'entity', 'internal', 'route'];

  /**
   * Static cache of local stream wrappers schemes.
   *
   * @var array<array-key, non-empty-string>
   *   Associative array keyed by local stream wrapper schemes and having their
   *   local paths as values.
   */
  protected array $localStreamWrappers;

  /**
   * Static cache of allowed protocols.
   *
   * @var list<string>
   */
  protected array $allowedSchemes;

  public function __construct(
    protected readonly EntityTypeManagerInterface $entityTypeManager,
    #[Autowire(service: 'router.no_access_checks')]
    protected readonly RouterInterface&RequestMatcherInterface $router,
    protected readonly LanguageManagerInterface $languageManager,
    protected readonly AccountInterface $currentUser,
    protected readonly InboundPathProcessorInterface $pathProcessor,
    protected readonly StreamWrapperManagerInterface $streamWrapperManager,
    protected readonly ModuleHandlerInterface $moduleHandler,
    protected readonly FileSystemInterface $fileSystem,
    protected readonly FileRepositoryInterface $fileRepository,
    protected readonly ?LanguageNegotiatorInterface $languageNegotiator = NULL,
    protected readonly ?RedirectRepository $redirectRepository = NULL,
  ) {}

  /**
   * {@inheritdoc}
   */
  public function guessFromUrl(string $url): ?EntityInterface {
    try {
      return $this->doGuessFromString($url);
    }
    catch (\Throwable $e) {
      // Completely malformed URL. Cannot resolve.
      return NULL;
    }

  }

  /**
   * Tries to guess an entity object out of a given URL.
   *
   * @param string $url
   *   The URL as string.
   *
   * @return \Drupal\Core\Entity\EntityInterface|null
   *   The entity, if one can be determined.
   */
  protected function doGuessFromString(string $url): ?EntityInterface {
    if ($url = $this->getUrl($url)) {
      return $this->doGuessFromUrl($url);
    }
    return NULL;
  }

  /**
   * Tries to guess an entity object out of a given URL.
   *
   * @param \Drupal\Core\Url $url
   *   The URL as object.
   *
   * @return \Drupal\Core\Entity\EntityInterface|null
   *   The entity, if one can be determined.
   */
  protected function doGuessFromUrl(Url $url): ?EntityInterface {
    if ($url->isRouted()) {
      return $this->doGuessFromRoutedUrl($url);
    }

    // If the URL points to a file, try to get the file entity.
    if ($entity = $this->doGuessFromFile($url)) {
      return $entity;
    }

    try {
      $query = $url->getOption('query') ?? [];
      $request = Request::create($url->toString(), Request::METHOD_GET, $query);
    }
    catch (BadRequestException) {
      return NULL;
    }

    // Determine the langcode from path. We do this here because the inbound
    // path processor will remove the langcode from the URL.
    $langcode = $this->getLangcode($request);

    // Apply inbound processing.
    $path = $this->pathProcessor->processInbound($url->toString(), $request);

    // Until now, we couldn't determine an entity. Is this URL a redirect?
    if ($newUrl = $this->followRedirect($path, $query, $langcode, $request)) {
      return $this->doGuessFromUrl($newUrl);
    }

    // Finally, allow 3rd-party to make a decision. Pick-up the first item.
    $results = $this->moduleHandler->invokeAll('track_usage_entity_guess', [$url, $langcode]);
    if ($results && $results[0] instanceof EntityInterface) {
      return $results[0];
    }

    return NULL;
  }

  /**
   * Returns a URL object of a string.
   *
   * Only return a URL object if the destination is a Drupal resource or base
   * path such as a file. Malformed URLs, URLs with forbidden protocols or URLs
   * pointing to other sites are rejected, and NULL is returned.
   *
   * @param string $stringUrl
   *   The URL as string.
   *
   * @return \Drupal\Core\Url|null
   *   The URL object if one can be determined.
   */
  protected function getUrl(string $stringUrl): ?Url {
    $stringUrl = trim($stringUrl);
    if (!$stringUrl) {
      return NULL;
    }

    // Some browsers treat \ as / so normalize to forward slashes.
    $stringUrl = str_replace('\\', '/', $stringUrl);

    // Leading control characters may be ignored or mishandled by browsers, so
    // assume such a path may lead to a non-local location. The \p{C} character
    // class matches all UTF-8 control, unassigned, and private characters.
    if (preg_match('/^\p{C}/u', $stringUrl) !== 0) {
      return NULL;
    }

    if (preg_match('#^//[^/]#', $stringUrl)) {
      // A URL starting with // is an HTTP URL without an explicit protocol. For
      // convenience, consider using the same scheme as the base URL.
      $stringUrl = parse_url($GLOBALS['base_url'], PHP_URL_SCHEME) . ":$stringUrl";
    }

    // Remove base URL.
    if (str_starts_with($stringUrl, $GLOBALS['base_url'])) {
      $stringUrl = substr($stringUrl, strlen($GLOBALS['base_url']));
    }
    // Remove the base path.
    if (str_starts_with($stringUrl, $GLOBALS['base_path'])) {
      $stringUrl = substr($stringUrl, strlen($GLOBALS['base_path']));
    }

    $parts = parse_url($stringUrl);
    if ($parts === FALSE) {
      // Severely malformed URL.
      return NULL;
    }

    $scheme = $parts['scheme'] ?? NULL;
    $host = $parts['host'] ?? NULL;
    $query = $parts['query'] ?? '';
    $fragment = $parts['fragment'] ?? NULL;

    // This scheme isn't allowed.
    if ($scheme && !$this->isValidScheme($scheme)) {
      return NULL;
    }
    // URL pointing to a different site.
    if ($host && $scheme && !array_key_exists($scheme, $this->getLocalStreamWrappers())) {
      return NULL;
    }

    // Normalize the query to an array.
    parse_str($query, $query);

    if (!$scheme) {
      // Potential internal.
      $stringUrl = 'internal:/' . ltrim($stringUrl, '/');
    }

    $options = array_filter(['query' => $query, 'fragment' => $fragment]);
    $stringUrl = Url::fromUri($stringUrl, $options);
    if ($stringUrl->isRouted() && in_array($stringUrl->getRouteName(), ['<none>', '<front>'], TRUE)) {
      return NULL;
    }

    return $stringUrl;
  }

  /**
   * Tries to extract the entity from a routed URL.
   *
   * @param \Drupal\Core\Url $url
   *   The URL object.
   *
   * @return \Drupal\Core\Entity\EntityInterface|null
   *   The entity or NULL.
   */
  protected function doGuessFromRoutedUrl(Url $url): ?EntityInterface {
    assert($url->isRouted());

    $route = $this->router->getRouteCollection()->get($url->getRouteName());
    foreach ($url->getRouteParameters() as $key => $value) {
      $parameters = $route->getOption('parameters') ?? [];
      if (!empty($parameters[$key]['type']) && str_starts_with($parameters[$key]['type'], 'entity:')) {
        $entityTypeId = substr($parameters[$key]['type'], 7);
        if ($entity = $this->entityTypeManager->getStorage($entityTypeId)->load($value)) {
          return $entity;
        }
      }
    }

    return NULL;
  }

  /**
   * Tries to guess a file entity out of a file URI or a file path.
   *
   * @param \Drupal\Core\Url $url
   *   The URL object.
   *
   * @return \Drupal\file\FileInterface|null
   *   The file entity or NULL.
   */
  protected function doGuessFromFile(Url $url): ?FileInterface {
    ['path' => $uri] = UrlHelper::parse($url->getUri());
    $scheme = parse_url($uri, PHP_URL_SCHEME);

    if (isset($this->getLocalStreamWrappers()[$scheme])) {
      return $this->fileRepository->loadByUri($uri);
    }
    elseif ($scheme === 'base') {
      [, $path] = explode(':', $uri, 2);
      $path = DRUPAL_ROOT . '/' . ltrim($path, '/');
      foreach ($this->getLocalStreamWrappers() as $scheme => $dir) {
        if (str_starts_with($path, $dir)) {
          $uri = $scheme . '://' . substr($path, strlen($dir) + 1);
          if ($file = $this->fileRepository->loadByUri($uri)) {
            return $file;
          }
        }
      }
    }

    return NULL;
  }

  /**
   * Returns the langcode from the path.
   *
   * @param \Symfony\Component\HttpFoundation\Request $request
   *   The request passed by reference.
   *
   * @return string|null
   *   The langcode or NULL.
   */
  protected function getLangcode(Request $request): ?string {
    if ($this->languageNegotiator) {
      $this->languageNegotiator->setCurrentUser($this->currentUser);
      $method = $this->languageNegotiator->getNegotiationMethodInstance('language-url');
      return $method->getLangcode($request);
    }
    return NULL;
  }

  /**
   * Returns the potential destination of a redirect path.
   *
   * @param string $path
   *   The path which may be a redirect.
   * @param array $query
   *   The query string as an array.
   * @param string|null $langcode
   *   The langcode.
   * @param \Symfony\Component\HttpFoundation\Request $request
   *   The request.
   *
   * @return \Drupal\Core\Url|null
   *   The destination URL or NULL.
   */
  protected function followRedirect(string $path, array $query, ?string $langcode, Request $request): ?Url {
    if (!class_exists('Drupal\redirect\Entity\Redirect')) {
      return NULL;
    }

    if (str_starts_with($request->getPathInfo(), '/system/files/') && !isset($query['file'])) {
      // Private files paths are split by the inbound path processor, and the
      // relative file path is moved to the 'file' query string parameter. This
      // is because the route system does not allow an arbitrary number of
      // parameters. We preserve the path as is returned by the request object.
      // @see \Drupal\system\PathProcessor\PathProcessorFiles::processInbound()
      $path = $request->getPathInfo();
    }

    $langcode ??= $this->languageManager->getDefaultLanguage()->getId();

    if ($redirect = $this->redirectRepository->findMatchingRedirect($path, $query, $langcode)) {
      $uri = $redirect->getRedirect()['uri'];
      $options = $redirect->getRedirect()['options'] ?? [];
      return Url::fromUri($uri, $options);
    }

    return NULL;
  }

  /**
   * Checks whether the given scheme is acceptable.
   *
   * Acceptable schemes are http(s), Drupal schemes and local stream wrapper
   * schemes.
   *
   * @param string $scheme
   *   The scheme.
   *
   * @return bool
   *   Whether the given scheme is acceptable.
   */
  protected function isValidScheme(string $scheme): bool {
    if (!isset($this->allowedSchemes)) {
      $this->allowedSchemes = [
        // Known allowed HTTP protocols.
        ...self::ALLOWED_PROTOCOLS,
        // Drupal schemes.
        ...self::DRUPAL_SCHEMES,
        // Local stream wrappers schemes.
        ...array_keys($this->getLocalStreamWrappers()),
      ];
    }
    return in_array($scheme, $this->allowedSchemes, TRUE);
  }

  /**
   * Returns a list of local stream wrappers.
   *
   * @return array<array-key, non-empty-string>
   *   Associative array keyed by the local stream wrapper schemes, having the
   *   wrapper local directories as values.
   */
  protected function getLocalStreamWrappers(): array {
    if (!isset($this->localStreamWrappers)) {
      foreach ($this->streamWrapperManager->getWrappers(StreamWrapperInterface::LOCAL) as $scheme => $info) {
        $wrapper = $this->streamWrapperManager->getViaScheme($scheme);
        assert($wrapper instanceof LocalStream);
        if ($path = $this->fileSystem->realpath($wrapper->getDirectoryPath())) {
          $this->localStreamWrappers[$scheme] = $path;
        }
      }
    }
    return $this->localStreamWrappers;
  }

}
