<?php

declare(strict_types=1);

namespace Drupal\ai_dropsolid\Tokenizer;

use Drupal\ai\Utility\TokenizerInterface;

/**
 * Decorates the core tokenizer to swap in Dropsolid's XLM-Roberta variant.
 */
final class DropsolidTokenizerDecorator implements TokenizerInterface {

  /**
   * Flag indicating whether the Dropsolid tokenizer is active.
   */
  private bool $useDropsolidTokenizer = FALSE;

  /**
   * Constructs the decorator.
   */
  public function __construct(
    private readonly TokenizerInterface $innerTokenizer,
    private readonly DropsolidXlmRobertaTokenizer $dropsolidTokenizer,
  ) {
  }

  /**
   * {@inheritdoc}
   */
  public function setModel(string $model): void {
    if ($this->dropsolidTokenizer->supportsModel($model)) {
      $this->useDropsolidTokenizer = TRUE;
      $this->dropsolidTokenizer->setModel($model);
      return;
    }

    $this->useDropsolidTokenizer = FALSE;
    $this->innerTokenizer->setModel($model);
  }

  /**
   * {@inheritdoc}
   */
  public function getSupportedModels(): array {
    $supportedModels = $this->innerTokenizer->getSupportedModels();

    foreach ($this->dropsolidTokenizer->getSupportedModels() as $option => $label) {
      $supportedModels[$option] = $label;
    }

    return $supportedModels;
  }

  /**
   * {@inheritdoc}
   */
  public function getTokens(string $chunk): array {
    return $this->activeTokenizer()->getTokens($chunk);
  }

  /**
   * {@inheritdoc}
   */
  public function countTokens(string $chunk): int {
    return $this->activeTokenizer()->countTokens($chunk);
  }

  /**
   * Returns encoded chunks using the active tokenizer.
   *
   * @param string $text
   *   The text to encode.
   * @param int $maxSize
   *   Maximum tokens per chunk.
   *
   * @return array
   *   Encoded chunks suitable for TextChunker consumption.
   */
  public function getEncodedChunks(string $text, int $maxSize): array {
    $tokenizer = $this->activeTokenizer();
    if (!method_exists($tokenizer, 'getEncodedChunks')) {
      throw new \RuntimeException('Active tokenizer does not support chunk encoding.');
    }

    return $tokenizer->getEncodedChunks($text, $maxSize);
  }

  /**
   * Decodes an encoded chunk via the active tokenizer.
   *
   * @param array $encodedChunk
   *   Encoded chunk produced by {@see getEncodedChunks()}.
   *
   * @return string
   *   Decoded text chunk.
   */
  public function decodeChunk(array $encodedChunk): string {
    $tokenizer = $this->activeTokenizer();
    if (!method_exists($tokenizer, 'decodeChunk')) {
      throw new \RuntimeException('Active tokenizer does not support chunk decoding.');
    }

    return $tokenizer->decodeChunk($encodedChunk);
  }

  /**
   * Retrieves the currently active tokenizer instance.
   */
  private function activeTokenizer(): TokenizerInterface {
    return $this->useDropsolidTokenizer ? $this->dropsolidTokenizer : $this->innerTokenizer;
  }

}
