<?php

namespace Drupal\dify_augmented_search\Service;

use League\CommonMark\CommonMarkConverter;

class MarkdownService {

  protected $converter;

  public function __construct() {
    $config = [
      'html_input' => 'escape',
      'allow_unsafe_links' => false,
      'max_nesting_level' => 10,
    ];

    $this->converter = new CommonMarkConverter($config);
  }

  public function toHtml(string $markdown): string {
    if (empty($markdown)) {
      return '';
    }

    try {
      $html = $this->converter->convertToHtml($markdown);
      return $this->processLinks($html);
    }
    catch (\Exception $e) {
      return htmlspecialchars($markdown, ENT_QUOTES | ENT_HTML5, 'UTF-8');
    }
  }

  public function toHtmlStreaming(string $markdown, bool $is_complete = false): string {
    if (empty($markdown)) {
      return '';
    }

    if (!$is_complete) {
      $lines = explode("\n", $markdown);
      $last_line = end($lines);

      try {
        $html = $this->converter->convertToHtml($markdown);
        return $this->processLinks($html);
      }
      catch (\Exception $e) {
        return $this->simpleMarkdownFormat($markdown);
      }
    }

    return $this->toHtml($markdown);
  }

  protected function simpleMarkdownFormat(string $text): string {
    $formatted = htmlspecialchars($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');

    $patterns = [
      '/\*\*(.*?)\*\*/s' => '<strong>$1</strong>',
      '/\*(.*?)\*/s' => '<em>$1</em>',
      '/`(.*?)`/s' => '<code>$1</code>',
      '/^# (.+)$/m' => '<h1>$1</h1>',
      '/^## (.+)$/m' => '<h2>$1</h2>',
      '/^### (.+)$/m' => '<h3>$1</h3>',
      '/^- (.+)$/m' => '<li>$1</li>',
    ];

    foreach ($patterns as $pattern => $replacement) {
      $formatted = preg_replace($pattern, $replacement, $formatted);
    }

    $formatted = nl2br($formatted);
    $formatted = preg_replace('/(<li>.*<\/li>)/s', '<ul>$1</ul>', $formatted);

    return $formatted;
  }

  protected function processLinks(string $html): string {
    $pattern = '/<a\s+([^>]*?)href\s*=\s*["\']([^"\']*)["\']([^>]*?)>/i';

    return preg_replace_callback($pattern, function($matches) {
      $beforeHref = $matches[1];
      $url = $matches[2];
      $afterHref = $matches[3];

      // For search results, open links in the same tab (not like a chatbot)
      if (preg_match('/^https?:\/\//i', $url)) {
        return '<a ' . $beforeHref . 'href="' . $url . '"' . $afterHref . ' rel="noopener noreferrer">';
      } else {
        return $matches[0];
      }
    }, $html);
  }

}
