<?php

namespace Drupal\cms_content_sync\Helper;

use Drupal\cms_content_sync\Controller\ContentSyncSettings;
use Drupal\Core\Language\LanguageInterface;
use Drupal\Core\StreamWrapper\PublicStream;
use Drupal\Core\Url;

/**
 * Helper to centralize link detection and transformation.
 */
class LinkHandlingHelper {

  /**
   * Transform a link field URI during push.
   *
   * This is used by the DefaultLinkHandler and optionally applies the global
   * absolute URL export setting.
   *
   * @param string $uri
   *   The original URI from the link field item.
   * @param bool $force_export_as_absolute
   *   Whether this field handler forces absolute URL export.
   * @param \Drupal\Core\Language\LanguageInterface|null $language
   *   The language to use when generating URLs for entities.
   *
   * @return array
   *   An analysis array with keys:
   *   - uri: string
   *   - link_entity: \Drupal\Core\Entity\EntityInterface|null
   *   - meta_data: array
   *   - should_embed: bool
   *   - skip: bool (optional; TRUE to omit this link item entirely)
   */
  public function rewriteLinkFieldUrl(string $uri, bool $force_export_as_absolute, ?LanguageInterface $language = NULL): array {
    // Find the linked entity and replace it's id with the UUID.
    // References have following pattern: entity:entity_type/entity_id.
    $found = [];
    preg_match('/^entity:(.*)\/(\d*)$/', $uri, $found);

    $meta_data = [];
    $link_entity = NULL;

    if (empty($found)) {
      $base_path = '/' . PublicStream::basePath();

      if (substr($uri, 0, strlen($base_path) + 10) === ('internal:' . $base_path . '/')) {
        // PDF files can have a #page=... anchor attached that we want to keep.
        $parts = explode('#', substr($uri, 10 + strlen($base_path)));
        $path = $parts[0];
        $anchor = count($parts) > 1 ? $parts[1] : '';

        $file_uri = 'public://' . urldecode($path);
        $files = \Drupal::entityTypeManager()
          ->getStorage('file')
          ->loadByProperties(['uri' => $file_uri]);

        if (count($files)) {
          $link_entity = reset($files);
          $meta_data['file_uri'] = $file_uri;
          if ($anchor) {
            $meta_data['uri_anchor'] = $anchor;
          }
        }

        $meta_data['uri_format'] = 'relative_file';
      }
      elseif (preg_match('@^internal:/(node)/([0-9]+)(#.*)?$@', $uri, $internal_route_found)) {
        $link_entity = \Drupal::entityTypeManager()
          ->getStorage($internal_route_found[1])
          ->load($internal_route_found[2]);

        $meta_data['uri_format'] = 'relative_entity';
        if (!empty($internal_route_found[3]) && strlen($internal_route_found[3]) > 1) {
          $meta_data['uri_anchor'] = substr($internal_route_found[3], 1);
        }
      }
    }
    else {
      $meta_data['uri_format'] = 'entity_reference';
    }

    // Field handler override: always export as absolute URL, never embed.
    if ($force_export_as_absolute) {
      $uri = Url::fromUri($uri, ['absolute' => TRUE])->toString();

      return [
        'uri' => $uri,
        'link_entity' => $link_entity,
        'meta_data' => $meta_data,
        'should_embed' => FALSE,
      ];
    }

    // Entity reference: resolve the entity by type/id if possible.
    if (!empty($found) && empty($link_entity)) {
      $link_entity_type = $found[1];
      $link_entity_id = $found[2];

      $entity_manager = \Drupal::entityTypeManager();
      $link_entity = $entity_manager
        ->getStorage($link_entity_type)
        ->load($link_entity_id);

      // Omit unresolvable entity references.
      if (empty($link_entity)) {
        return [
          'uri' => $uri,
          'link_entity' => NULL,
          'meta_data' => $meta_data,
          'should_embed' => FALSE,
          'skip' => TRUE,
        ];
      }
    }

    // Global settings: export certain file/node links as absolute URLs.
    $settings = ContentSyncSettings::getInstance();

    // Files: match by user regex against the original URI string.
    $absolute_file_regex = $this->wrapUserRegex($settings->getAbsoluteFileUrlRegex());
    if ($absolute_file_regex && ($meta_data['uri_format'] ?? NULL) === 'relative_file') {
      $ok = @preg_match($absolute_file_regex, $uri);
      if ($ok === 1) {
        $uri = Url::fromUri($uri, ['absolute' => TRUE])->toString();

        return [
          'uri' => $uri,
          'link_entity' => $link_entity,
          'meta_data' => $meta_data,
          'should_embed' => FALSE,
        ];
      }
    }

    // Nodes: export selected bundles as absolute URLs.
    $absolute_bundles = $settings->getAbsoluteContentUrlBundles() ?? [];
    if (!empty($absolute_bundles) && $link_entity && $link_entity->getEntityTypeId() === 'node') {
      $bundle = $link_entity->bundle();
      if (in_array($bundle, $absolute_bundles)) {
        $absolute_url = $link_entity->toUrl('canonical', [
          'absolute' => TRUE,
          // Workaround for PathProcessorAlias::processOutbound to explicitly ignore us
          // as we always want the pure, unaliased e.g. /node/:id path because
          // we don't use the URL for end-users but for editors and it has to
          // be reliable (aliases can be removed or change).
          'alias' => TRUE,
        ] + ($language ? ['language' => $language] : []))->toString();

        if (!empty($meta_data['uri_anchor'])) {
          $absolute_url .= '#' . $meta_data['uri_anchor'];
        }

        return [
          'uri' => $absolute_url,
          'link_entity' => $link_entity,
          'meta_data' => $meta_data,
          'should_embed' => FALSE,
        ];
      }
    }

    // Previous behavior: only embed if an entity was resolved.
    if (empty($link_entity) && empty($found)) {
      return [
        'uri' => $uri,
        'link_entity' => NULL,
        'meta_data' => $meta_data,
        'should_embed' => FALSE,
      ];
    }

    return [
      'uri' => $uri,
      'link_entity' => $link_entity,
      'meta_data' => $meta_data,
      'should_embed' => TRUE,
    ];
  }

  /**
   * Rewrite relative file/node links to absolute URLs based on global settings.
   *
   * This mirrors the matching logic used by DefaultFormattedTextHandler's
   * dependency extraction to ensure the same links can be excluded from
   * embedding.
   */
  public function rewriteHtmlLinks(string $text): string {
    $settings = ContentSyncSettings::getInstance();

    $absolute_file_regex = $this->wrapUserRegex($settings->getAbsoluteFileUrlRegex());
    $absolute_bundles = $settings->getAbsoluteContentUrlBundles() ?? [];

    if (!$absolute_file_regex && empty($absolute_bundles)) {
      return $text;
    }

    $base_path = PublicStream::basePath();

    $base_url = NULL;
    try {
      $base_url = $settings->getSiteBaseUrl();
    }
    catch (\Exception $e) {
      // Ignore and keep relative URLs.
      $base_url = NULL;
    }

    // Files in HTML: <img src="/..."> and <a href="/..."> as well as Markdown images.
    if ($absolute_file_regex && $base_url) {
      $text = preg_replace_callback(
        '@<(img)\s[^>]*src="/' . $base_path . '/([^"]+)"@',
        function (array $matches) use ($absolute_file_regex, $base_url, $base_path) {
          $path_with_suffix = $matches[2];
          $path_for_match = preg_replace('@#.*$@', '', $path_with_suffix);
          $path_for_match = preg_replace('@\\?.*$@', '', $path_for_match);

          $ok = @preg_match($absolute_file_regex, $path_for_match);
          if ($ok !== 1) {
            return $matches[0];
          }

          $absolute = rtrim($base_url, '/') . '/' . $base_path . '/' . $path_with_suffix;

          return str_replace('src="/' . $base_path . '/' . $path_with_suffix . '"', 'src="' . $absolute . '"', $matches[0]);
        },
        $text
      );

      $text = preg_replace_callback(
        '@<(a)\s[^>]*href="/' . $base_path . '/([^"]+)"@',
        function (array $matches) use ($absolute_file_regex, $base_url, $base_path) {
          $path_with_suffix = $matches[2];
          $path_for_match = preg_replace('@#.*$@', '', $path_with_suffix);
          $path_for_match = preg_replace('@\\?.*$@', '', $path_for_match);

          $ok = @preg_match($absolute_file_regex, $path_for_match);
          if ($ok !== 1) {
            return $matches[0];
          }

          $absolute = rtrim($base_url, '/') . '/' . $base_path . '/' . $path_with_suffix;

          return str_replace('href="/' . $base_path . '/' . $path_with_suffix . '"', 'href="' . $absolute . '"', $matches[0]);
        },
        $text
      );

      // Markdown images, e.g. ![alt](/<base_path>/<path>)
      $text = preg_replace_callback(
        '@\!\[[^\]]*\]\(\/' . $base_path . '\/([^)]+)\)@',
        function (array $matches) use ($absolute_file_regex, $base_url, $base_path) {
          $path_with_suffix = $matches[1];

          $path_for_match = preg_replace('@#.*$@', '', $path_with_suffix);
          $path_for_match = preg_replace('@\\?.*$@', '', $path_for_match);

          $ok = @preg_match($absolute_file_regex, $path_for_match);
          if ($ok !== 1) {
            return $matches[0];
          }

          $absolute = rtrim($base_url, '/') . '/' . $base_path . '/' . $path_with_suffix;

          return str_replace('(/' . $base_path . '/' . $path_with_suffix . ')', '(' . $absolute . ')', $matches[0]);
        },
        $text
      );
    }

    // Node links in HTML: <a href="/node/<id>...">.
    if (!empty($absolute_bundles)) {
      $text = preg_replace_callback(
        '@<(a)\s[^>]*href="/(node)/([0-9]+)([^"]*)"@',
        function (array $matches) use ($absolute_bundles) {
          $id = $matches[3];
          $suffix = $matches[4] ?? '';

          $entity = \Drupal::entityTypeManager()
            ->getStorage('node')
            ->load($id);

          if (!$entity) {
            return $matches[0];
          }

          if (!in_array($entity->bundle(), $absolute_bundles)) {
            return $matches[0];
          }

          $absolute = $entity->toUrl('canonical', [
            'absolute' => TRUE,
            'language' => $entity->language(),
            // Workaround for PathProcessorAlias::processOutbound to explicitly ignore us
            // as we always want the pure, unaliased e.g. /node/:id path because
            // we don't use the URL for end-users but for editors and it has to
            // be reliable (aliases can be removed or change).
            'alias' => TRUE,
          ])->toString();

          return str_replace('href="/node/' . $id . $suffix . '"', 'href="' . $absolute . $suffix . '"', $matches[0]);
        },
        $text
      );
    }

    return $text;
  }

  /**
   * Wrap a user-provided regex (without delimiters) into a safe pattern.
   *
   * Will create a case insensitive pattern with ~ delimiters.
   */
  protected function wrapUserRegex(?string $pattern): ?string {
    if (!is_string($pattern)) {
      return NULL;
    }

    $pattern = trim($pattern);
    if ($pattern === '') {
      return NULL;
    }

    $delimiter = '~';
    $wrapped = $delimiter . str_replace($delimiter, '\\' . $delimiter, $pattern) . $delimiter . 'i';

    // If invalid, ignore silently (the form validator should prevent this, but
    // config imports might still contain invalid patterns).
    $ok = @preg_match($wrapped, '');
    if ($ok === FALSE) {
      return NULL;
    }

    return $wrapped;
  }

}
