<?php

namespace Drupal\feed_tamper_read_time\Plugin\Tamper;

use Drupal\Core\Form\FormStateInterface;
use Drupal\tamper\TamperableItemInterface;
use Drupal\tamper\TamperBase;

/**
 * Tamper plugin to calculate reading time from HTML content.
 *
 * @Tamper(
 *   id = "feed_tamper_read_time",
 *   label = @Translation("Read Time Calculator"),
 *   description = @Translation("Calculates reading time from HTML content with configurable WPM."),
 *   category = "Other"
 * )
 */
class ReadTimePlugin extends TamperBase
{

  const SETTING_WPM = 'wpm';

  /**
   * {@inheritdoc}
   */
  public function defaultConfiguration()
  {
    $config = parent::defaultConfiguration();
    $config[self::SETTING_WPM] = 200; // Default WPM
    return $config;
  }

  /**
   * {@inheritdoc}
   */
  public function buildConfigurationForm(array $form, FormStateInterface $form_state)
  {
    $form[self::SETTING_WPM] = [
      '#type' => 'number',
      '#title' => $this->t('Words Per Minute (WPM)'),
      '#description' => $this->t('Enter the reading speed in words per minute. Default is 200.'),
      '#default_value' => $this->getSetting(self::SETTING_WPM),
      '#min' => 50,
      '#max' => 1000,
      '#step' => 10,
    ];

    return $form;
  }

  /**
   * {@inheritdoc}
   */
  public function submitConfigurationForm(array &$form, FormStateInterface $form_state)
  {
    parent::submitConfigurationForm($form, $form_state);
    $wpm = $form_state->getValue(self::SETTING_WPM);
    $this->setConfiguration([
      self::SETTING_WPM => $wpm,
    ]);
  }

  /**
   * {@inheritdoc}
   */
  public function tamper($data, TamperableItemInterface|null $item = NULL)
  {
    // Handle HTML content properly by extracting text
    $text = $this->extractTextFromHtml($data);

    // Count words (excluding empty strings)
    $words = preg_split('/\s+/', trim($text), -1, PREG_SPLIT_NO_EMPTY);
    $wordCount = count($words);

    // Get WPM setting
    $wpm = (int) $this->getSetting(self::SETTING_WPM);
    if ($wpm < 1) {
      $wpm = 200; // Fallback to default if invalid
    }

    // Calculate reading time in minutes and round up
    $readingTimeMinutes = ceil($wordCount / $wpm);

    // Return only the number of minutes
    return (string) $readingTimeMinutes;
  }

  /**
   * Extracts text content from HTML while preserving structure.
   *
   * @param string $html
   *   The HTML content to process.
   *
   * @return string
   *   The extracted text content.
   */
  private function extractTextFromHtml($html)
  {
    $dom = new \DOMDocument();

    // Use UTF-8 encoding and suppress warnings
    @$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);

    // Remove script and style elements
    $scripts = $dom->getElementsByTagName('script');
    $styles = $dom->getElementsByTagName('style');

    foreach ($scripts as $script) {
      $script->parentNode->removeChild($script);
    }

    foreach ($styles as $style) {
      $style->parentNode->removeChild($style);
    }

    // Extract text with proper spacing
    $xpath = new \DOMXPath($dom);

    // Get text nodes, excluding script and style content (already removed)
    $textNodes = $xpath->query('//text()[normalize-space()]');

    $text = '';
    foreach ($textNodes as $node) {
      $text .= trim($node->nodeValue) . ' ';
    }

    // Clean up extra whitespace
    $text = preg_replace('/\s+/', ' ', $text);

    return trim($text);
  }
}
