<?php

declare(strict_types=1);

namespace Drupal\table_header_scope_attribute;

use Drupal\Component\Utility\Html;

/**
 * Service for validating HTML elements.
 */
class HtmlElementValidator implements HtmlElementValidatorInterface {

  /**
   * List of void/self-closing elements that are always considered non-empty.
   */
  private const VOID_ELEMENTS = [
    'img',
    'br',
    'hr',
    'input',
    'area',
    'base',
    'col',
    'embed',
    'link',
    'meta',
    'param',
    'source',
    'track',
    'wbr',
  ];

  /**
   * Regex pattern for matching all Unicode whitespace characters.
   */
  private const UNICODE_WHITESPACE_PATTERN = '/[\s\x{00A0}\x{200B}\x{200C}\x{200D}\x{FEFF}]+/u';

  /**
   * {@inheritdoc}
   */
  public function isElementContentEmpty(\DOMElement $element): bool {
    // Check all child nodes.
    foreach ($element->childNodes as $child) {
      // If it's an element node, check if it's meaningful.
      if ($child->nodeType === XML_ELEMENT_NODE) {
        // Void elements are always considered non-empty.
        if (in_array(strtolower($child->nodeName), self::VOID_ELEMENTS, TRUE)) {
          return FALSE;
        }

        // For other elements (like span), recursively check if they're empty.
        if (!$this->isElementContentEmpty($child)) {
          return FALSE;
        }
      }
      // If it's a text node with non-whitespace content, it's not empty.
      elseif ($child->nodeType === XML_TEXT_NODE) {
        $text_content = trim($child->nodeValue);
        if ($text_content !== '') {
          // Decode HTML entities and check again.
          $decoded_content = Html::decodeEntities($text_content);
          $normalized_content = trim($decoded_content);

          if ($normalized_content !== '') {
            // Remove all Unicode whitespace characters and check if anything
            // remains.
            $without_whitespace = preg_replace(self::UNICODE_WHITESPACE_PATTERN, '', $normalized_content);
            if ($without_whitespace !== '') {
              return FALSE;
            }
          }
        }
      }
      // Other node types (comments, CDATA, processing instructions) are
      // ignored as they don't contribute to visible content. For example,
      // <!-- comment --> or <![CDATA[data]]> are considered decorative.
    }

    // If we've checked all children and found no meaningful content, it's
    // empty.
    return TRUE;
  }

}
