<?php

declare(strict_types=1);

namespace Drupal\Tests\tmgmt_laratranslate\Unit\Service;

use Drupal\Tests\UnitTestCase;
use Drupal\tmgmt_laratranslate\Service\RecursiveCharacterTextSplitter;
use Psr\Log\LoggerInterface;

/**
 * Tests for RecursiveCharacterTextSplitter service.
 *
 * @coversDefaultClass \Drupal\tmgmt_laratranslate\Service\RecursiveCharacterTextSplitter
 * @group tmgmt_laratranslate
 */
class RecursiveCharacterTextSplitterTest extends UnitTestCase {

  /**
   * The text splitter service.
   *
   * @var \Drupal\tmgmt_laratranslate\Service\RecursiveCharacterTextSplitter
   */
  private RecursiveCharacterTextSplitter $splitter;

  /**
   * {@inheritdoc}
   */
  protected function setUp(): void {
    parent::setUp();

    $logger = $this->createMock(LoggerInterface::class);
    $this->splitter = new RecursiveCharacterTextSplitter($logger);
  }

  /**
   * Tests basic text splitting with default separators.
   *
   * @covers ::splitText
   * @covers ::configure
   */
  public function testBasicSplitting(): void {
    $text = "This is a paragraph.\n\nThis is another paragraph.\n\nAnd a third one.";

    $this->splitter->configure([
      'chunk_size' => 30,
      'chunk_overlap' => 0,
    ]);

    $chunks = $this->splitter->splitText($text);

    self::assertIsArray($chunks);
    self::assertNotEmpty($chunks);

    // Verify all text is preserved.
    $reassembled = implode("\n\n", $chunks);
    self::assertEquals($text, $reassembled);
  }

  /**
   * Tests splitting with different separators.
   *
   * @covers ::splitText
   * @covers ::configure
   */
  public function testCustomSeparators(): void {
    $text = "Part1|Part2|Part3|Part4";

    $this->splitter->configure([
      'separators' => ['|', ''],
      'chunk_size' => 10,
      'chunk_overlap' => 0,
      'keep_separator' => FALSE,
    ]);

    $chunks = $this->splitter->splitText($text);

    self::assertIsArray($chunks);
    self::assertGreaterThan(1, count($chunks));
  }

  /**
   * Tests splitting with keep_separator option.
   *
   * @covers ::splitText
   * @covers ::configure
   */
  public function testKeepSeparator(): void {
    $text = "Line1\nLine2\nLine3";

    // Test with keep_separator = TRUE.
    $this->splitter->configure([
      'separators' => ["\n"],
      'chunk_size' => 10,
      'chunk_overlap' => 0,
      'keep_separator' => TRUE,
    ]);

    $chunks = $this->splitter->splitText($text);
    self::assertIsArray($chunks);

    // Test with keep_separator = FALSE.
    $this->splitter->configure([
      'keep_separator' => FALSE,
    ]);

    $chunks = $this->splitter->splitText($text);
    self::assertIsArray($chunks);
  }

  /**
   * Tests chunk overlap functionality.
   *
   * @covers ::splitText
   * @covers ::configure
   */
  public function testChunkOverlap(): void {
    $text = str_repeat("A", 100) . "\n" . str_repeat("B", 100) . "\n" . str_repeat("C", 100);

    $this->splitter->configure([
      'chunk_size' => 120,
      'chunk_overlap' => 20,
      'keep_separator' => FALSE,
    ]);

    $chunks = $this->splitter->splitText($text);

    self::assertGreaterThan(1, count($chunks));

    // Verify overlap exists between chunks.
    for ($i = 0; $i < count($chunks) - 1; $i++) {
      $currentEnd = substr($chunks[$i], -10);
      $nextStart = substr($chunks[$i + 1], 0, 10);

      // There should be some overlap or connection.
      self::assertNotEmpty($currentEnd);
      self::assertNotEmpty($nextStart);
    }
  }

  /**
   * Tests splitting PHP code.
   *
   * @covers ::forLanguage
   * @covers ::splitText
   */
  public function testPhpCodeSplitting(): void {
    $code = <<<'PHP'
<?php

class MyClass {
  public function method1() {
    return 'value1';
  }

  public function method2() {
    return 'value2';
  }
}

function myFunction() {
  return 'result';
}
PHP;

    $splitter = $this->splitter->forLanguage('php', [
      'chunk_size' => 100,
      'chunk_overlap' => 0,
    ]);

    $chunks = $splitter->splitText($code);

    self::assertIsArray($chunks);
    self::assertNotEmpty($chunks);
  }

  /**
   * Tests splitting Python code.
   *
   * @covers ::forLanguage
   * @covers ::splitText
   */
  public function testPythonCodeSplitting(): void {
    $code = <<<'PYTHON'
class MyClass:
    def method1(self):
        return 'value1'

    def method2(self):
        return 'value2'

def my_function():
    return 'result'
PYTHON;

    $splitter = $this->splitter->forLanguage('python', [
      'chunk_size' => 80,
      'chunk_overlap' => 0,
    ]);

    $chunks = $splitter->splitText($code);

    self::assertIsArray($chunks);
    self::assertNotEmpty($chunks);
  }

  /**
   * Tests splitting JavaScript code.
   *
   * @covers ::forLanguage
   * @covers ::splitText
   */
  public function testJavaScriptCodeSplitting(): void {
    $code = <<<'JS'
function myFunction() {
  return 'result';
}

const myConst = 'value';

class MyClass {
  constructor() {
    this.value = 'test';
  }
}
JS;

    $splitter = $this->splitter->forLanguage('javascript', [
      'chunk_size' => 80,
      'chunk_overlap' => 0,
    ]);

    $chunks = $splitter->splitText($code);

    self::assertIsArray($chunks);
    self::assertNotEmpty($chunks);
  }

  /**
   * Tests splitting Markdown content.
   *
   * @covers ::forLanguage
   * @covers ::splitText
   */
  public function testMarkdownSplitting(): void {
    $markdown = <<<'MD'
# Heading 1

This is a paragraph.

## Heading 2

Another paragraph here.

### Heading 3

Final paragraph.
MD;

    $splitter = $this->splitter->forLanguage('markdown', [
      'chunk_size' => 50,
      'chunk_overlap' => 0,
    ]);

    $chunks = $splitter->splitText($markdown);

    self::assertIsArray($chunks);
    self::assertNotEmpty($chunks);
  }

  /**
   * Tests splitting HTML content.
   *
   * @covers ::forLanguage
   * @covers ::splitText
   */
  public function testHtmlSplitting(): void {
    $html = <<<'HTML'
<div>
  <h1>Title</h1>
  <p>Paragraph 1</p>
  <p>Paragraph 2</p>
  <ul>
    <li>Item 1</li>
    <li>Item 2</li>
  </ul>
</div>
HTML;

    $splitter = $this->splitter->forLanguage('html', [
      'chunk_size' => 60,
      'chunk_overlap' => 0,
    ]);

    $chunks = $splitter->splitText($html);

    self::assertIsArray($chunks);
    self::assertNotEmpty($chunks);
  }

  /**
   * Tests that small text is not split.
   *
   * @covers ::splitText
   * @covers ::configure
   */
  public function testSmallTextNotSplit(): void {
    $text = "This is a small text.";

    $this->splitter->configure([
      'chunk_size' => 1000,
      'chunk_overlap' => 0,
    ]);

    $chunks = $this->splitter->splitText($text);

    self::assertCount(1, $chunks);
    self::assertEquals($text, trim($chunks[0]));
  }

  /**
   * Tests handling of empty text.
   *
   * @covers ::splitText
   */
  public function testEmptyText(): void {
    $chunks = $this->splitter->splitText('');

    self::assertIsArray($chunks);
    self::assertEmpty($chunks);
  }

  /**
   * Tests custom length function.
   *
   * @covers ::configure
   * @covers ::splitText
   */
  public function testCustomLengthFunction(): void {
    $text = "Word1 Word2 Word3 Word4 Word5";

    // Use word count instead of character count.
    $this->splitter->configure([
      'chunk_size' => 2,
      'chunk_overlap' => 0,
      'separators' => [' ', ''],
      'length_function' => fn($text) => str_word_count($text),
      'keep_separator' => FALSE,
    ]);

    $chunks = $this->splitter->splitText($text);

    self::assertIsArray($chunks);
    self::assertGreaterThan(1, count($chunks));

    // Each chunk should have at most 2 words.
    foreach ($chunks as $chunk) {
      self::assertLessThanOrEqual(2, str_word_count($chunk));
    }
  }

  /**
   * Tests unsupported language throws exception.
   *
   * @covers ::forLanguage
   */
  public function testUnsupportedLanguageThrowsException(): void {
    $this->expectException(\InvalidArgumentException::class);
    $this->expectExceptionMessage('Language "unsupported" is not supported');

    $this->splitter->forLanguage('unsupported');
  }

  /**
   * Tests regex separator support.
   *
   * @covers ::splitText
   * @covers ::configure
   */
  public function testRegexSeparators(): void {
    $text = "Item1,Item2;Item3,Item4;Item5";

    $this->splitter->configure([
      'separators' => ['[,;]'],
      'is_separator_regex' => TRUE,
      'chunk_size' => 20,
      'chunk_overlap' => 0,
      'keep_separator' => FALSE,
    ]);

    $chunks = $this->splitter->splitText($text);

    self::assertIsArray($chunks);
    self::assertGreaterThan(1, count($chunks));
  }

  /**
   * Tests that chunks respect size limits.
   *
   * @covers ::splitText
   * @covers ::configure
   */
  public function testChunkSizeLimits(): void {
    $text = str_repeat("A", 50) . "\n\n" . str_repeat("B", 50) . "\n\n" . str_repeat("C", 50);

    $this->splitter->configure([
      'chunk_size' => 60,
      'chunk_overlap' => 0,
    ]);

    $chunks = $this->splitter->splitText($text);

    foreach ($chunks as $chunk) {
      $length = $this->splitter->getTextLength($chunk);
      // Each chunk should be close to or under the limit.
      // Allow some margin for separators and overlap.
      self::assertLessThanOrEqual(70, $length, "Chunk exceeds expected size: $length");
    }
  }

  /**
   * Tests UTF-8 multibyte character handling.
   *
   * @covers ::splitText
   * @covers ::configure
   */
  public function testMultibyteCharacters(): void {
    $text = "こんにちは世界\n\nHello world\n\nСлава Україні";

    $this->splitter->configure([
      'chunk_size' => 20,
      'chunk_overlap' => 0,
    ]);

    $chunks = $this->splitter->splitText($text);

    self::assertIsArray($chunks);
    self::assertNotEmpty($chunks);

    // Verify all text is preserved.
    $reassembled = implode("\n\n", array_map('trim', $chunks));
    self::assertEquals(trim($text), $reassembled);
  }

  /**
   * Tests keep_separator "start" option.
   *
   * @covers ::splitText
   * @covers ::configure
   */
  public function testKeepSeparatorStart(): void {
    $text = "Part1\nPart2\nPart3";

    $this->splitter->configure([
      'separators' => ["\n"],
      'chunk_size' => 10,
      'chunk_overlap' => 0,
      'keep_separator' => 'start',
    ]);

    $chunks = $this->splitter->splitText($text);

    self::assertIsArray($chunks);
    self::assertNotEmpty($chunks);
  }

  /**
   * Tests keep_separator "end" option.
   *
   * @covers ::splitText
   * @covers ::configure
   */
  public function testKeepSeparatorEnd(): void {
    $text = "Part1\nPart2\nPart3";

    $this->splitter->configure([
      'separators' => ["\n"],
      'chunk_size' => 10,
      'chunk_overlap' => 0,
      'keep_separator' => 'end',
    ]);

    $chunks = $this->splitter->splitText($text);

    self::assertIsArray($chunks);
    self::assertNotEmpty($chunks);
  }

}
