<?php

namespace Drupal\tmgmt_supertext_ai;

use Drupal\tmgmt_file\RecursiveDOMIterator;

/**
 * Prepares data for translation submission.
 *
 * Iterable: items are chunks ready to be submitted to SupertextAI.
 */
class SupertextAiData implements \IteratorAggregate {

  /**
   * Maximum characters per request.
   */
  public const AI_TRANSLATION_MAX_CHARS = 10000;

  /**
   * Chunks of text to be translated.
   *
   * Text that is added to the object is split into chunks that are smaller
   * than the AI_TRANSLATION_MAX_CHARS limit.
   *
   * @var array
   */
  protected array $chunks = [];

  /**
   * Chunk index.
   *
   * @var int|null
   */
  protected ?int $chunkIndex = NULL;

  /**
   * Politeness level.
   *
   * @var string
   */
  protected string $politeness = 'default';

  /**
   * Tag handling.
   *
   * @var string
   */
  protected string $tagHandling = 'html';

  /**
   * Glossary.
   *
   * @var array
   */
  protected array $glossary = [];

  /**
   * Mappings.
   *
   * @var array
   */
  protected array $mapping = [];

  public function __construct(
    protected string $sourceLang,
    protected string $targetLang,
  ) {
  }

  /**
   * {@inheritdoc}
   */
  public function getIterator(): \Traversable {
    $chunks = [];
    foreach ($this->chunks as $key => $chunk) {
      $chunks[$key] = [
        'text' => $this->chunks[$key],
        'source_lang' => $this->sourceLang,
        'target_lang' => $this->targetLang,
        'politeness' => $this->politeness,
        'tag_handling' => $this->tagHandling,
      ];

      if ($this->glossary) {
        $chunks[$key]['glossary'] = $this->glossary;
      }
    }
    return new \ArrayIterator($chunks);
  }

  /**
   * Set politeness.
   *
   * @param string $politeness
   *   'default', 'more' or 'less'.
   */
  public function setPoliteness(string $politeness): void {
    $this->politeness = $politeness;
  }

  /**
   * Set tag handling.
   *
   * @param string $tag_handling
   *   'none', 'xml' or 'html'
   */
  public function setTagHandling(string $tag_handling): void {
    $this->tagHandling = $tag_handling;
  }

  /**
   * Set the Ad-hoc glossary for the translation as source-target term pairs.
   *
   * @param array $glossary
   *   Source-target term pairs.
   */
  public function setGlossary(array $glossary): void {
    $this->glossary = $glossary;
  }

  /**
   * Add text to be translated.
   *
   * @param mixed $id
   *   The id for the item. For example the key returned from
   *   \Drupal\tmgmt\Data::filterTranslatable.
   * @param string $text
   *   Text string.
   */
  public function addText(mixed $id, string $text): void {
    if (!is_numeric($this->chunkIndex)) {
      $this->chunkIndex = 0;
    }

    if (($this->getChunkLength($this->chunkIndex) + mb_strlen($text)) >= self::AI_TRANSLATION_MAX_CHARS) {
      $this->chunkIndex++;
    }
    foreach ($this->splitTags($text) as $subchunk) {
      $this->chunks[$this->chunkIndex][] = $subchunk;
      $this->mapping[$this->chunkIndex][] = $id;
    }

  }

  /**
   * Splits strings to chunks on p, div boundary.
   */
  protected function splitTags(string $text): array {
    $dom = new \DOMDocument('1.0', 'UTF-8');

    $dom->loadHTML(
      "<!doctype html><html><head><meta charset='UTF-8'></head><body>{$text}</body></html>",
      LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
    );

    $chunks = [];
    $buffer = '';

    $body = $dom->getElementsByTagName('body')->item(0);
    foreach (iterator_to_array($body->childNodes) as $node) {
      if ($node->nodeType === XML_ELEMENT_NODE) {
        $name = strtolower($node->nodeName);
        if ($name == 'p' || $name == 'div') {
          if (trim($buffer) != '') {
            $chunks[] = $buffer;
            $buffer = '';
          }
          $chunks[] = $dom->saveHTML($node);
          continue;
        }
      }

      $buffer .= $dom->saveHTML($node);
    }

    if (trim($buffer) !== '') {
      $chunks[] = $buffer;
    }

    return $chunks;
  }

  /**
   * Get the length of a chunk.
   *
   * @param int $chunk_id
   *   Chunk id.
   *
   * @return int
   */
  public function getChunkLength(int $chunk_id): int {
    $length = 0;
    if (empty($this->chunks[$chunk_id])) {
      return $length;
    }

    foreach ($this->chunks[$chunk_id] as $part) {
      $length += mb_strlen($part);
    }

    return $length;
  }

  /**
   * Get mapping.
   *
   * @param int|null $chunk_id
   *   (optional) Get mapping for a certain chunk.
   *
   * @return array
   */
  public function getMapping(?int $chunk_id): array {
    if (is_numeric($chunk_id)) {
      return $this->mapping[$chunk_id];
    }
    return $this->mapping;
  }



}

