<?php

declare(strict_types=1);

namespace Drupal\ai_document_ocr\Plugin\AiAutomatorType;

use Drupal\ai_automators\PluginBaseClasses\RuleBase;
use Drupal\ai_automators\PluginInterfaces\AiAutomatorTypeInterface;
use Drupal\Core\Entity\ContentEntityInterface;
use Drupal\Core\Field\FieldDefinitionInterface;
use Drupal\ai\AiProviderPluginManager;
use Drupal\ai\Service\AiProviderFormHelper;
use Drupal\ai\Service\PromptJsonDecoder\PromptJsonDecoderInterface;
use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\File\FileSystemInterface;
use Drupal\Core\Logger\LoggerChannelFactoryInterface;
use Drupal\ai_automators\Attribute\AiAutomatorType;
use Drupal\Core\StringTranslation\StringTranslationTrait;
use Drupal\Core\StringTranslation\TranslatableMarkup;
use Google\Cloud\DocumentAI\V1\Client\DocumentProcessorServiceClient;
use Google\Cloud\DocumentAI\V1\RawDocument;
use Google\Cloud\DocumentAI\V1\ProcessRequest;
use Symfony\Component\DependencyInjection\ContainerInterface;

/**
 * Document Processor automator type for AI Document OCR.
 */
#[AiAutomatorType(
  id: 'document_processor',
  label: new TranslatableMarkup('Document Processor'),
  field_rule: 'string_long',
  target: '',
)]
class DocumentProcessor extends RuleBase implements AiAutomatorTypeInterface {

  use StringTranslationTrait;

  /**
   * Constructs a DocumentProcessor object.
   */
  public function __construct(
    AiProviderPluginManager $pluginManager,
    AiProviderFormHelper $formHelper,
    PromptJsonDecoderInterface $promptJsonDecoder,
    protected LoggerChannelFactoryInterface $loggerFactory,
    protected ConfigFactoryInterface $configFactory,
    protected EntityTypeManagerInterface $entityTypeManager,
    protected FileSystemInterface $fileSystem,
  ) {
    parent::__construct($pluginManager, $formHelper, $promptJsonDecoder);
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    return new static(
      $container->get('ai.provider'),
      $container->get('ai.form_helper'),
      $container->get('ai.prompt_json_decode'),
      $container->get('logger.factory'),
      $container->get('config.factory'),
      $container->get('entity_type.manager'),
      $container->get('file_system'),
    );
  }

  /**
   * The automator title.
   */
  public string $title = 'Document Processor';

  /**
   * The LLM Type.
   */
  public string $llmType = 'document_to_text';

  /**
   * {@inheritdoc}
   */
  public function placeholderText(): string {
    return (string) $this->t('Extract text from uploaded documents using OCR');
  }

  /**
   * {@inheritDoc}
   */
  public function allowedInputs(): array {
    return [
      'image',
      'file',
    ];
  }

  /**
   * {@inheritdoc}
   */
  public function generate(ContentEntityInterface $entity, FieldDefinitionInterface $fieldDefinition, array $automatorConfig): array {
    $base_field = $automatorConfig['base_field'];

    if (!$entity->hasField($base_field) || $entity->get($base_field)->isEmpty()) {
      return [];
    }

    $results = [];

    // Process each uploaded file/image.
    foreach ($entity->get($base_field) as $item) {
      if ($item->entity) {
        $file = $item->entity;
        $result = $this->processDocumentFile($file, $automatorConfig);

        if ($result) {
          $results[] = $result['text'];
        }
      }
    }

    return $results;
  }

  /**
   * Process a document file with OCR.
   */
  protected function processDocumentFile($file, array $automatorConfig): ?array {
    try {
      // Get Document OCR Provider configuration.
      $config = $this->configFactory->get('ai_document_ocr.settings');
      $credentials_key = $config->get('general_credentials_file');
      $processor_id = $config->get('processor_id');
      if (!$credentials_key || !$processor_id) {
        throw new \Exception('Document OCR provider not properly configured. Missing credentials or processor ID.');
      }
      $region = $config->get('default_region');

      // Load credentials from Key module.
      $key_entity = $this->entityTypeManager->getStorage('key')->load($credentials_key);
      if (!$key_entity) {
        throw new \Exception('Could not load credentials key: ' . $credentials_key);
      }

      $credentials_json = $key_entity->getKeyValue();
      $credentials = json_decode($credentials_json, TRUE);
      if (!$credentials || !isset($credentials['project_id'])) {
        throw new \Exception('Invalid credentials format.');
      }

      $project_id = $credentials['project_id'];

      // Log the configuration for debugging.
      $this->loggerFactory->get('ai_document_ocr')->info(
        'Processing document with config - Project: @project, Region: @region, Processor: @processor',
        [
          '@project' => $project_id,
          '@region' => $region,
          '@processor' => $processor_id,
        ]
      );

      // If the file is a URL, fetch the content directly from the URL.
      if (filter_var($file->getFileUri(), FILTER_VALIDATE_URL)) {
        $document_path = $file->getFileUri();
        $file_content = file_get_contents($document_path);
        if ($file_content === FALSE) {
          $this->loggerFactory->get('ai_document_ocr')->error(
            'Failed to fetch the content from URL: @path',
            ['@path' => $document_path]
          );
        }
      }
      else {
        // If it's a local file, use the existing method to get the real path.
        $document_path = $this->fileSystem->realpath($file->getFileUri());
        $handle = fopen($document_path, 'rb');
        $file_content = fread($handle, filesize($document_path));
        fclose($handle);
      }

      if (!$file_content) {
        return NULL;
      }

      // Create Google Document AI client.
      $client = new DocumentProcessorServiceClient([
        'apiEndpoint' => $region . '-documentai.googleapis.com',
        'credentials' => $credentials,
      ]);

      // Verify processor exists by trying to get it.
      try {
        $full_processor_name = $client->processorName($project_id, $region, $processor_id);
        $this->loggerFactory->get('ai_document_ocr')->info(
          'Using processor: @processor_name',
          ['@processor_name' => $full_processor_name]
        );
      }
      catch (\Exception $e) {
        $this->loggerFactory->get('ai_document_ocr')->error(
          'Failed to build processor name: @error',
          ['@error' => $e->getMessage()]
        );
        throw $e;
      }

      // Load file contents into a RawDocument.
      $raw_document = (new RawDocument())
        ->setContent($file_content)
        ->setMimeType($file->getMimeType());

      // Use the processor name we already built and verified.
      // Send a ProcessRequest and get a ProcessResponse.
      $request = (new ProcessRequest())
        ->setName($full_processor_name)
        ->setRawDocument($raw_document);

      $response = $client->processDocument($request);
      $document = $response->getDocument();

      // Extract text.
      $text = $document->getText();
      // Calculate confidence if available.
      $confidence = 0.0;
      $page_count = 0;
      $confidence_sum = 0.0;

      foreach ($document->getPages() as $page) {
        $page_count++;
        // Get page confidence if available.
        if (method_exists($page, 'getLayout') && $page->getLayout()) {
          $layout = $page->getLayout();
          if (method_exists($layout, 'getConfidence')) {
            $confidence_sum += $layout->getConfidence();
          }
        }
      }

      if ($page_count > 0) {
        $confidence = $confidence_sum / $page_count;
      }

      return [
        'text' => $text,
        'confidence' => $confidence,
        'structured_data' => [],
        'metadata' => [
          'pages' => $page_count,
          'mime_type' => $file->getMimeType(),
          'filename' => $file->getFilename(),
        ],
      ];
    }
    catch (\Exception $e) {
      $this->loggerFactory->get('ai_document_ocr')->error(
        'OCR processing failed for file @file: @error',
        [
          '@file' => $file->getFilename(),
          '@error' => $e->getMessage(),
        ]
      );
      return NULL;
    }
  }

  /**
   * {@inheritdoc}
   */
  public function verifyValue(ContentEntityInterface $entity, $value, FieldDefinitionInterface $fieldDefinition, array $automatorConfig): bool {
    return is_string($value);
  }

  /**
   * {@inheritdoc}
   */
  public function storeValues(ContentEntityInterface $entity, array $values, FieldDefinitionInterface $fieldDefinition, array $automatorConfig) {
    $entity->set($fieldDefinition->getName(), $values);
    return TRUE;
  }

}
