<?php

namespace Drupal\eb\Service;

use Drupal\Component\Utility\Html;
use Drupal\Component\Utility\Xss;

/**
 * Sanitizes content to prevent XSS attacks.
 *
 * Recursively processes definition data arrays to sanitize string fields
 * that could contain malicious HTML/JavaScript content.
 */
class ContentSanitizer {

  /**
   * Field names that should be sanitized.
   *
   * These fields commonly contain user-provided text that could be displayed
   * in the UI and thus are potential XSS vectors.
   *
   * @var array<string>
   */
  protected const SANITIZE_FIELDS = [
    'label',
    'description',
    'help_text',
    'title',
    'placeholder',
    'prefix',
    'suffix',
    'empty_value',
    'empty_label',
    'help',
  ];

  /**
   * Allowed HTML tags for sanitized fields.
   *
   * Basic formatting tags that are safe and commonly used in field labels.
   *
   * @var array<string>
   */
  protected const ALLOWED_TAGS = ['em', 'strong', 'br'];

  /**
   * Sanitizes definition data array recursively.
   *
   * Processes all string fields in the data array that match known field
   * names, applying XSS filtering and HTML entity escaping.
   *
   * @param array<string, mixed> $data
   *   The definition data to sanitize.
   *
   * @return array<string, mixed>
   *   Sanitized definition data.
   */
  public function sanitize(array $data): array {
    return $this->sanitizeRecursive($data);
  }

  /**
   * Recursively sanitizes an array.
   *
   * @param array<string, mixed> $data
   *   Data array to process.
   *
   * @return array<string, mixed>
   *   Processed data array.
   */
  protected function sanitizeRecursive(array $data): array {
    foreach ($data as $key => &$value) {
      if (is_array($value)) {
        $value = $this->sanitizeRecursive($value);
      }
      elseif (is_string($value) && $this->shouldSanitize($key)) {
        $value = $this->sanitizeString($value);
      }
    }
    return $data;
  }

  /**
   * Determines if a field should be sanitized.
   *
   * @param string|int $key
   *   The array key (field name).
   *
   * @return bool
   *   TRUE if the field should be sanitized.
   */
  protected function shouldSanitize(string|int $key): bool {
    if (!is_string($key)) {
      return FALSE;
    }
    return in_array($key, self::SANITIZE_FIELDS, TRUE);
  }

  /**
   * Sanitizes a single string value.
   *
   * @param string $value
   *   The string to sanitize.
   *
   * @return string
   *   Sanitized string with dangerous HTML removed.
   */
  protected function sanitizeString(string $value): string {
    // First pass: XSS filter with allowed tags.
    $filtered = Xss::filter($value, self::ALLOWED_TAGS);

    // Second pass: Ensure any remaining special characters are escaped.
    // This catches edge cases the XSS filter might miss.
    return Html::escape($filtered);
  }

  /**
   * Sanitizes a single field value.
   *
   * Public method for sanitizing individual values without recursion.
   *
   * @param string $value
   *   The string to sanitize.
   *
   * @return string
   *   Sanitized string.
   */
  public function sanitizeField(string $value): string {
    return $this->sanitizeString($value);
  }

}
