<?php

declare(strict_types=1);

namespace Drupal\Tests\ai_dropsolid\Unit\Utility;

use Drupal\ai_dropsolid\Utility\RecursiveCharacterTextSplitter;
use PHPUnit\Framework\TestCase;

/**
 * Tests for the RecursiveCharacterTextSplitter utility.
 *
 * @coversDefaultClass \Drupal\ai_dropsolid\Utility\RecursiveCharacterTextSplitter
 * @group ai_dropsolid
 */
final class RecursiveCharacterTextSplitterTest extends TestCase {

  /**
   * Tests basic text splitting functionality.
   *
   * @covers ::__construct
   * @covers ::split
   */
  public function testBasicTextSplitting(): void {
    $splitter = new RecursiveCharacterTextSplitter(
      chunkSize: 50,
      chunkOverlap: 10,
      separators: ["\n\n", "\n", " "]
    );

    $text = "This is a test sentence. This is another sentence that should be split properly.";
    $chunks = $splitter->split($text);

    $this->assertIsArray($chunks);
    $this->assertNotEmpty($chunks);

    // Verify each chunk respects size limits.
    foreach ($chunks as $chunk) {
      $this->assertLessThanOrEqual(50, mb_strlen($chunk));
    }
  }

  /**
   * Tests constructor parameter validation.
   *
   * @covers ::__construct
   */
  public function testConstructorValidation(): void {
    // Test negative chunk size.
    $this->expectException(\InvalidArgumentException::class);
    $this->expectExceptionMessage('Chunk size must be a positive integer.');
    new RecursiveCharacterTextSplitter(chunkSize: -1);
  }

  /**
   * Tests constructor overlap validation.
   *
   * @covers ::__construct
   */
  public function testConstructorOverlapValidation(): void {
    // Test negative overlap.
    $this->expectException(\InvalidArgumentException::class);
    $this->expectExceptionMessage('Chunk overlap cannot be negative.');
    new RecursiveCharacterTextSplitter(chunkSize: 100, chunkOverlap: -1);
  }

  /**
   * Tests constructor overlap exceeding chunk size.
   *
   * @covers ::__construct
   */
  public function testConstructorOverlapExceedsChunkSize(): void {
    $this->expectException(\InvalidArgumentException::class);
    $this->expectExceptionMessage('Chunk overlap must be less than chunk size.');
    new RecursiveCharacterTextSplitter(chunkSize: 50, chunkOverlap: 60);
  }

  /**
   * Tests splitting empty text.
   *
   * @covers ::split
   */
  public function testSplittingEmptyText(): void {
    $splitter = new RecursiveCharacterTextSplitter();

    $this->assertSame([], $splitter->split(''));
    $this->assertSame([], $splitter->split('   '));
    $this->assertSame([], $splitter->split("\n\n\n"));
  }

  /**
   * Tests text normalization functionality.
   *
   * @covers ::split
   */
  public function testTextNormalization(): void {
    $splitter = new RecursiveCharacterTextSplitter(chunkSize: 100);

    $text = "Line 1\r\nLine 2\r\n\r\n\r\nLine 3";
    $chunks = $splitter->split($text);

    $this->assertNotEmpty($chunks);
    $this->assertStringNotContainsString("\r", $chunks[0]);
  }

  /**
   * Tests paragraph separation handling.
   *
   * @covers ::split
   */
  public function testParagraphSeparation(): void {
    $splitter = new RecursiveCharacterTextSplitter(
      chunkSize: 30,
      chunkOverlap: 5,
      separators: ["\n\n", "\n", " "]
    );

    $text = "Paragraph one.\n\nParagraph two with more text.\n\nParagraph three.";
    $chunks = $splitter->split($text);

    $this->assertGreaterThan(1, count($chunks));

    // Verify paragraphs are preferentially kept together.
    $fullText = implode(' ', $chunks);
    $this->assertStringContainsString('Paragraph one', $fullText);
    $this->assertStringContainsString('Paragraph two', $fullText);
  }

  /**
   * Tests overlap calculation with various separators.
   *
   * @covers ::split
   */
  public function testOverlapCalculation(): void {
    $splitter = new RecursiveCharacterTextSplitter(
      chunkSize: 40,
      chunkOverlap: 15,
      separators: [". ", " "]
    );

    $text = "First sentence. Second sentence. Third sentence. Fourth sentence.";
    $chunks = $splitter->split($text);

    $this->assertGreaterThan(1, count($chunks));

    // Check that consecutive chunks have some overlap.
    if (count($chunks) > 1) {
      $overlap = $this->findOverlap($chunks[0], $chunks[1]);
      $this->assertGreaterThan(0, mb_strlen($overlap));
    }
  }

  /**
   * Tests handling of text smaller than chunk size.
   *
   * @covers ::split
   */
  public function testSmallText(): void {
    $splitter = new RecursiveCharacterTextSplitter(chunkSize: 1000);

    $text = "Small text.";
    $chunks = $splitter->split($text);

    $this->assertCount(1, $chunks);
    $this->assertSame('Small text.', $chunks[0]);
  }

  /**
   * Tests recursive splitting with multiple separators.
   *
   * @covers ::split
   */
  public function testRecursiveSplitting(): void {
    $splitter = new RecursiveCharacterTextSplitter(
      chunkSize: 20,
      chunkOverlap: 3,
      separators: ["\n\n", "\n", ". ", " ", ""]
    );

    $text = "Word1 Word2 Word3 Word4 Word5 Word6 Word7 Word8 Word9 Word10";
    $chunks = $splitter->split($text);

    $this->assertGreaterThan(1, count($chunks));

    // Verify each chunk is within size limits.
    foreach ($chunks as $chunk) {
      $this->assertLessThanOrEqual(20, mb_strlen($chunk));
    }
  }

  /**
   * Tests character-level fallback splitting.
   *
   * @covers ::split
   */
  public function testCharacterLevelFallback(): void {
    // Force the splitter to fall back to character-level separation.
    $splitter = new RecursiveCharacterTextSplitter(
      chunkSize: 5,
      chunkOverlap: 1,
      separators: [""],
    );

    $text = "verylongwordwithoutspaces";
    $chunks = $splitter->split($text);

    $this->assertGreaterThan(1, count($chunks));

    foreach ($chunks as $chunk) {
      $this->assertLessThanOrEqual(5, mb_strlen($chunk));
    }
  }

  /**
   * Helper method to find overlap between two text chunks.
   *
   * @param string $chunk1
   *   The first chunk.
   * @param string $chunk2
   *   The second chunk.
   *
   * @return string
   *   The overlapping text, if any.
   */
  private function findOverlap(string $chunk1, string $chunk2): string {
    $maxOverlap = min(mb_strlen($chunk1), mb_strlen($chunk2));

    for ($i = $maxOverlap; $i > 0; $i--) {
      $suffix = mb_substr($chunk1, -$i);
      $prefix = mb_substr($chunk2, 0, $i);

      if ($suffix === $prefix) {
        return $suffix;
      }
    }

    return '';
  }

}
