<?php

namespace Drupal\content_first;

use League\HTMLToMarkdown\Converter\TableConverter;
use League\HTMLToMarkdown\HtmlConverter;

/**
 * Represents a rendered content byt content first.
 *
 * It will include the rendered content from an entity without the HTML tags,
 * plus methods to get HTMl, Xpath and Markdown.
 */
class RenderedContent {

  /**
   * The rendered content from an entity without the HTML tags.
   *
   * @var string
   */
  protected string $html;

  /**
   * Used to add metadata to markdown.
   *
   * @var array
   */
  protected array $attributes;

  /**
   * Constructor.
   *
   * @param string $html
   *   The rendered content from an entity without the HTML tags.
   * @param array $attributes
   *   Attributes that will be used at markdown.
   */
  public function __construct(string $html, array $attributes = []) {
    $this->html = $html;
    $this->attributes = $attributes;
  }

  /**
   * Gets the filtered HTML.
   *
   * @return string
   *   The filtered HTML.
   */
  protected function getFilteredHtml() : string {
    return mb_encode_numericentity($this->html, [0x80, 0x10FFFF, 0, 0x1FFFFF], 'UTF-8');
  }

  /**
   * Gets the rendered content from an entity without the HTML tags.
   *
   * @return string
   *   The rendered content from an entity without the HTML tags.
   */
  public function getHtml() : string {
    $html = $this->getFilteredHtml();

    return (string) $this->addMarkdownToHeadings($html) ?? '';
  }

  /**
   * Gets the XPath object for the rendered content.
   *
   * @return \DOMXPath
   *   The XPath object for the rendered content.
   */
  public function getXpath() : \DOMXPath {
    $dom = new \DOMDocument();
    // Suppress warnings due to malformed HTML.
    @$dom->loadHTML($this->getFilteredHtml());
    return new \DOMXPath($dom);
  }

  /**
   * Gets the Markdown version of the rendered content.
   *
   * @return string
   *   The Markdown version of the rendered content.
   */
  public function getMarkdown() : string {
    $converter = new HtmlConverter(
      [
        'strip_tags' => TRUE,
        // Needed h1:# and H2:# style headers for JS marked plugin.
        'header_style' => 'atx',
      ]
    );
    // Support for tables:
    $converter->getEnvironment()->addConverter(new TableConverter());

    $markdown = $converter->convert((string) $this->html);
    // Remove extra line breaks:
    $attributes_markdown = !empty($this->attributes) ? $this->attributesToMarkdown() . PHP_EOL . PHP_EOL : '';

    return $attributes_markdown . PHP_EOL . preg_replace('/(\n\s*){2,}/', "\n\n", $markdown);
  }

  /**
   * Convert the entity attributes to markdown.
   *
   * @return string
   *   Example:
   *   ----
   *     title: Hello world
   *     url: https://example.com
   *     meta-description: Lorem ipsum
   *     ...
   *   ----
   */
  protected function attributesToMarkdown() : string {
    $attributes_string = '';
    foreach ($this->attributes as $name => $value) {
      $attributes_string .= $name . ': "' . $value . '"' . PHP_EOL;
    }
    return '---' . PHP_EOL . $attributes_string . '---';
  }

  /**
   * Add Markdown-style anchors for headings.
   *
   * @param string $html
   *   The HTML content to process.
   *
   * @return string|null
   *   The processed HTML content.
   */
  public function addMarkdownToHeadings($html) {
    if (empty($html)) {
      return NULL;
    }
    // Process from h1 to h6.
    for ($i = 1; $i <= 6; $i++) {
      if (empty($html)) {
        return NULL;
      }
      $html = preg_replace_callback(
        '/<h' . $i . '(.*?)>(.*?)<\/h' . $i . '>/is',
        function ($matches) use ($i) {
          $attrs = $matches[1];
          $content = trim($matches[2]);
          $hashes = str_repeat('#', $i);
          return "<h$i$attrs>$hashes $content</h$i>";
        },
        $html
      );
    }
    return $html;
  }

  /**
   * Gets a clean version of the content suitable for JSON.
   *
   * Similar to markdown but with escaped characters for JSON compatibility.
   *
   * @return string
   *   The clean version of the rendered content.
   */
  public function getClean() : string {
    $clean = $this->getMarkdown();

    $clean = str_replace(["\r\n", "\n", "\r"], ' ', $clean);
    $clean = str_replace(["#", "\\"], '', $clean);

    $clean = preg_replace('/\s+/', ' ', $clean);

    $clean = preg_replace('/[\x00-\x1F\x7F-\x9F]/', '', $clean);

    $clean = trim($clean);

    return mb_encode_numericentity(filter_var($clean, FILTER_SANITIZE_SPECIAL_CHARS), [0x80, 0x10FFFF, 0, 0x1FFFFF], 'UTF-8');
  }

  /**
   * Checks if the rendered content is empty.
   *
   * @return bool
   *   TRUE if the rendered content is empty, FALSE otherwise.
   */
  public function isEmpty() : bool {
    return empty($this->getHtml());
  }

}
