<?php

declare(strict_types=1);

namespace Drupal\Tests\ai_dropsolid\Unit\Service;

use Drupal\ai\Utility\TokenizerInterface;
use Drupal\ai_dropsolid\Service\RecursiveTextChunker;
use PHPUnit\Framework\TestCase;

/**
 * Tests for the RecursiveTextChunker service.
 *
 * @coversDefaultClass \Drupal\ai_dropsolid\Service\RecursiveTextChunker
 * @group ai_dropsolid
 */
final class RecursiveTextChunkerTest extends TestCase {

  /**
   * The mocked tokenizer service.
   */
  private TokenizerInterface $tokenizer;

  /**
   * The text chunker service under test.
   */
  private RecursiveTextChunker $textChunker;

  /**
   * {@inheritdoc}
   */
  protected function setUp(): void {
    parent::setUp();

    $this->tokenizer = $this->createMock(TokenizerInterface::class);
    $this->textChunker = new RecursiveTextChunker($this->tokenizer);
  }

  /**
   * Tests successful text chunking with valid parameters.
   *
   * @covers ::__construct
   * @covers ::chunkText
   */
  public function testSuccessfulTextChunking(): void {
    $text = "This is a sample text that should be chunked properly according to the specified parameters.";
    $maxSize = 50;
    $minOverlap = 10;

    $chunks = $this->textChunker->chunkText($text, $maxSize, $minOverlap);

    $this->assertIsArray($chunks);
    $this->assertNotEmpty($chunks);
  }

  /**
   * Tests chunking parameter validation for maximum size.
   *
   * @covers ::chunkText
   */
  public function testChunkingParameterValidationMaxSize(): void {
    $this->expectException(\InvalidArgumentException::class);
    $this->expectExceptionMessage('Maximum chunk size must be a positive integer.');

    $this->textChunker->chunkText('test text', 0, 0);
  }

  /**
   * Tests chunking parameter validation for negative overlap.
   *
   * @covers ::chunkText
   */
  public function testChunkingParameterValidationNegativeOverlap(): void {
    $this->expectException(\InvalidArgumentException::class);
    $this->expectExceptionMessage('Minimum overlap cannot be negative.');

    $this->textChunker->chunkText('test text', 100, -5);
  }

  /**
   * Tests chunking parameter validation for overlap exceeding max size.
   *
   * @covers ::chunkText
   */
  public function testChunkingParameterValidationOverlapExceedsMaxSize(): void {
    $this->expectException(\InvalidArgumentException::class);
    $this->expectExceptionMessage('Minimum overlap must be less than maximum chunk size.');

    $this->textChunker->chunkText('test text', 50, 60);
  }

  /**
   * Tests token counting delegation to tokenizer service.
   *
   * @covers ::countTokens
   */
  public function testTokenCounting(): void {
    $text = "Sample text for token counting";
    $expectedTokenCount = 42;

    // Create a stub for the tokenizer.
    $tokenizerStub = $this->createStub(TokenizerInterface::class);
    $tokenizerStub->method('countTokens')->willReturn($expectedTokenCount);

    $textChunker = new RecursiveTextChunker($tokenizerStub);
    $result = $textChunker->countTokens($text);

    $this->assertSame($expectedTokenCount, $result);
  }

  /**
   * Tests model setting delegation to tokenizer service.
   *
   * @covers ::setModel
   */
  public function testModelSetting(): void {
    $model = 'gpt-4';

    // Since setModel returns void, we just verify no exceptions occur.
    $this->textChunker->setModel($model);
    $this->expectNotToPerformAssertions();
  }

  /**
   * Tests text normalization functionality.
   *
   * @covers ::chunkText
   */
  public function testTextNormalization(): void {
    $text = "Line 1\r\nLine 2\r\n\r\n\r\nLine 3   \n   \nLine 4";
    $maxSize = 100;
    $minOverlap = 5;

    $chunks = $this->textChunker->chunkText($text, $maxSize, $minOverlap);

    $this->assertIsArray($chunks);
    $this->assertNotEmpty($chunks);

    // Verify normalization removed excessive line breaks and whitespace.
    $firstChunk = $chunks[0];
    $this->assertStringNotContainsString("\r", $firstChunk);
    $this->assertStringNotContainsString("\n\n\n", $firstChunk);
  }

  /**
   * Tests chunking with very small token limits.
   *
   * @covers ::chunkText
   */
  public function testChunkingWithSmallTokenLimits(): void {
    $text = "Word1 Word2 Word3 Word4 Word5";
    // Use a deliberately small token limit to exercise the overlap logic.
    $maxSize = 2;
    $minOverlap = 1;

    $chunks = $this->textChunker->chunkText($text, $maxSize, $minOverlap);

    $this->assertIsArray($chunks);
    $this->assertNotEmpty($chunks);

    // With small limits, we should get multiple chunks.
    $this->assertGreaterThan(1, count($chunks));
  }

  /**
   * Tests chunking with empty text.
   *
   * @covers ::chunkText
   */
  public function testChunkingWithEmptyText(): void {
    $maxSize = 100;
    $minOverlap = 10;

    $chunks = $this->textChunker->chunkText('', $maxSize, $minOverlap);
    $this->assertSame([], $chunks);

    $chunks = $this->textChunker->chunkText('   ', $maxSize, $minOverlap);
    $this->assertSame([], $chunks);
  }

  /**
   * Tests token-to-character conversion calculations.
   *
   * @covers ::chunkText
   */
  public function testTokenToCharacterConversion(): void {
    $text = "This text should be processed with proper token-to-character conversion.";
    // 20 tokens should convert to roughly 72 characters (20 * 4 * 0.9).
    $maxSize = 20;
    // 5 tokens should convert to roughly 18 characters (5 * 4 * 0.9).
    $minOverlap = 5;

    $chunks = $this->textChunker->chunkText($text, $maxSize, $minOverlap);

    $this->assertIsArray($chunks);
    $this->assertNotEmpty($chunks);
  }

  /**
   * Tests chunking preserves text integrity.
   *
   * @covers ::chunkText
   */
  public function testChunkingPreservesTextIntegrity(): void {
    $originalText = "Sentence one. Sentence two. Sentence three. Sentence four.";
    $maxSize = 15;
    $minOverlap = 3;

    $chunks = $this->textChunker->chunkText($originalText, $maxSize, $minOverlap);

    // Reconstruct text from chunks (removing overlap).
    $reconstructed = '';
    foreach ($chunks as $index => $chunk) {
      if ($index === 0) {
        $reconstructed .= $chunk;
      }
      else {
        // For simplicity, just append without removing overlap.
        // In practice, overlap handling would be more sophisticated.
        $reconstructed .= ' ' . $chunk;
      }
    }

    // Verify key content is preserved.
    $this->assertStringContainsString('Sentence one', $reconstructed);
    $this->assertStringContainsString('Sentence two', $reconstructed);
  }

  /**
   * Tests chunking with complex paragraph structure.
   *
   * @covers ::chunkText
   */
  public function testChunkingWithComplexParagraphStructure(): void {
    $text = "Paragraph 1 line 1.\nParagraph 1 line 2.\n\nParagraph 2 line 1.\nParagraph 2 line 2.\n\n\nParagraph 3.";
    $maxSize = 25;
    $minOverlap = 5;

    $chunks = $this->textChunker->chunkText($text, $maxSize, $minOverlap);

    $this->assertIsArray($chunks);
    $this->assertNotEmpty($chunks);

    // Verify paragraph boundaries are respected where possible.
    foreach ($chunks as $chunk) {
      $this->assertStringNotContainsString("\n\n\n", $chunk);
    }
  }

}
