<?php

namespace Drupal\dify_augmented_search\Service;

use League\CommonMark\CommonMarkConverter;

/**
 * Service for rendering markdown content using League CommonMark.
 */
class MarkdownService {

  /**
   * The CommonMarkConverter.
   *
   * @var \League\CommonMark\CommonMarkConverter
   */
  protected $converter;

  public function __construct() {
    $config = [
      'html_input' => 'escape',
      'allow_unsafe_links' => FALSE,
      'max_nesting_level' => 10,
    ];

    $this->converter = new CommonMarkConverter($config);
  }

  /**
   * Converts markdown text to HTML.
   *
   * @param string $markdown
   *   The markdown text to convert.
   *
   * @return string
   *   The converted HTML.
   */
  public function toHtml(string $markdown): string {
    if (empty($markdown)) {
      return '';
    }

    try {
      $html = $this->converter->convert($markdown)->getContent();
      return $this->processLinks($html);
    }
    catch (\Exception $e) {
      return htmlspecialchars($markdown, ENT_QUOTES | ENT_HTML5, 'UTF-8');
    }
  }

  /**
   * Converts markdown text to HTML with streaming-friendly processing.
   *
   * @param string $markdown
   *   The markdown text to convert.
   * @param bool $is_complete
   *   Whether this is the complete content or partial streaming content.
   *
   * @return string
   *   The converted HTML.
   */
  public function toHtmlStreaming(string $markdown, bool $is_complete = FALSE): string {
    if (empty($markdown)) {
      return '';
    }

    if (!$is_complete) {
      $lines = explode("\n", $markdown);
      $last_line = end($lines);

      try {
        $html = $this->converter->convert($markdown)->getContent();
        return $this->processLinks($html);
      }
      catch (\Exception $e) {
        return $this->simpleMarkdownFormat($markdown);
      }
    }

    return $this->toHtml($markdown);
  }

  /**
   * Simple markdown formatting for fallback scenarios.
   *
   * @param string $text
   *   The text to format.
   *
   * @return string
   *   The formatted HTML.
   */
  protected function simpleMarkdownFormat(string $text): string {
    $formatted = htmlspecialchars($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');

    $patterns = [
      '/\*\*(.*?)\*\*/s' => '<strong>$1</strong>',
      '/\*(.*?)\*/s' => '<em>$1</em>',
      '/`(.*?)`/s' => '<code>$1</code>',
      '/^# (.+)$/m' => '<h1>$1</h1>',
      '/^## (.+)$/m' => '<h2>$1</h2>',
      '/^### (.+)$/m' => '<h3>$1</h3>',
      '/^- (.+)$/m' => '<li>$1</li>',
    ];

    foreach ($patterns as $pattern => $replacement) {
      $formatted = preg_replace($pattern, $replacement, $formatted);
    }

    $formatted = nl2br($formatted);
    $formatted = preg_replace('/(<li>.*<\/li>)/s', '<ul>$1</ul>', $formatted);

    return $formatted;
  }

  /**
   * Process HTML to add rel attributes to absolute links.
   *
   * @param string $html
   *   The HTML content to process.
   *
   * @return string
   *   The processed HTML with rel attributes added to absolute links.
   */
  protected function processLinks(string $html): string {
    $pattern = '/<a\s+([^>]*?)href\s*=\s*["\']([^"\']*)["\']([^>]*?)>/i';

    return preg_replace_callback($pattern, function ($matches) {
      $beforeHref = $matches[1];
      $url = $matches[2];
      $afterHref = $matches[3];

      // For search results, open links in the same tab (not like a chatbot)
      if (preg_match('/^https?:\/\//i', $url)) {
        return '<a ' . $beforeHref . 'href="' . $url . '"' . $afterHref . ' rel="noopener noreferrer">';
      }
      else {
        return $matches[0];
      }
    }, $html);
  }

}
