<?php

declare(strict_types=1);

namespace Drupal\ai_document_ocr\Plugin\AiProvider;

use Google\Cloud\DocumentAI\V1\Client\DocumentProcessorServiceClient;
use Google\Cloud\DocumentAI\V1\RawDocument;
use Google\Cloud\DocumentAI\V1\ProcessRequest;
use Psr\Http\Client\ClientInterface;
use Drupal\ai\Attribute\AiProvider;
use Drupal\ai\Base\AiProviderClientBase;
use Drupal\ai\Exception\AiBadRequestException;
use Drupal\ai_document_ocr\OperationType\DocumentToText\DocumentToTextInput;
use Drupal\ai_document_ocr\OperationType\DocumentToText\DocumentToTextInterface;
use Drupal\ai_document_ocr\OperationType\DocumentToText\DocumentToTextOutput;
use Drupal\Component\Serialization\Json;
use Drupal\Core\Config\ImmutableConfig;
use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\Core\StringTranslation\StringTranslationTrait;
use Drupal\Core\StringTranslation\TranslatableMarkup;
use Symfony\Component\Yaml\Yaml;

/**
 * Plugin implementation of the 'ai_document_ocr' AI Provider.
 */
#[AiProvider(
  id: 'ai_document_ocr',
  label: new TranslatableMarkup('Google Document AI OCR'),
)]
class DocumentOcrProvider extends AiProviderClientBase implements
  ContainerFactoryPluginInterface,
  DocumentToTextInterface {

  use StringTranslationTrait;

  /**
   * We want to add models to the provider dynamically.
   *
   * @var bool
   */
  protected bool $hasPredefinedModels = TRUE;

  /**
   * {@inheritdoc}
   */
  public function isUsable(?string $operation_type = NULL, array $capabilities = []): bool {
    // Always allow the provider to be available for configuration purposes.
    // The actual operation will fail gracefully if not properly configured.
    // If it's one of the operations that Document AI supports.
    if ($operation_type) {
      return in_array($operation_type, $this->getSupportedOperationTypes());
    }
    return TRUE;
  }

  /**
   * Check if the provider is properly configured.
   *
   * @return bool
   *   TRUE if configured, FALSE otherwise.
   */
  public function isConfigured(): bool {
    // Check if the module is properly configured with key-based credentials.
    return (bool) $this->getConfig()->get('general_credentials_file');
  }

  /**
   * {@inheritdoc}
   */
  public function getSupportedOperationTypes(): array {
    return [
      'document_to_text',
    ];
  }

  /**
   * {@inheritdoc}
   */
  public function getConfig(): ImmutableConfig {
    return $this->configFactory->get('ai_document_ocr.settings');
  }

  /**
   * {@inheritdoc}
   */
  public function getApiDefinition(): array {
    // Load the configuration from YAML file.
    return Yaml::parseFile($this->moduleHandler->getModule('ai_document_ocr')->getPath() . '/definitions/api_defaults.yml');
  }

  /**
   * {@inheritdoc}
   */
  public function getConfiguredModels(?string $operation_type = NULL, array $capabilities = []): array {
    $api_definition = $this->getApiDefinition();

    // If no operation type specified, return models for all operation types.
    if (!$operation_type) {
      $all_models = [];
      foreach ($this->getSupportedOperationTypes() as $type) {
        if (isset($api_definition[$type]['models'])) {
          foreach ($api_definition[$type]['models'] as $model_id => $model_config) {
            $all_models[$model_id] = $model_config['label'] ?? $model_id;
          }
        }
      }
      return $all_models;
    }

    // Return models for specific operation type.
    if (isset($api_definition[$operation_type]['models'])) {
      $models = [];
      foreach ($api_definition[$operation_type]['models'] as $model_id => $model_config) {
        $models[$model_id] = $model_config['label'] ?? $model_id;
      }
      return $models;
    }

    return [];
  }

  /**
   * {@inheritdoc}
   */
  public function getModelSettings(string $model_id, array $generalConfig = []): array {
    return $generalConfig;
  }

  /**
   * {@inheritdoc}
   */
  public function getModelInfo(string $operation_type, string $model_id): array {
    // Check first override from configuration.
    $models = $this->getModelsConfig();
    if (isset($models[$this->getPluginId()][$operation_type][$model_id])) {
      return $models[$this->getPluginId()][$operation_type][$model_id];
    }

    // Get from API definition.
    $api_definition = $this->getApiDefinition();
    if (isset($api_definition[$operation_type]['models'][$model_id])) {
      $model_config = $api_definition[$operation_type]['models'][$model_id];
      $model_config['model_id'] = $model_id;
      return $model_config;
    }

    // Fallback to default behavior.
    $models = $this->getConfiguredModels($operation_type);
    if (isset($models[$model_id])) {
      return [
        'model_id' => $model_id,
        'label' => $models[$model_id],
      ];
    }

    return [];
  }

  /**
   * {@inheritdoc}
   */
  public function setAuthentication(mixed $authentication): void {
    // Authentication is handled via the configuration.
    // Store the authentication information if needed.
  }

  /**
   * Gets the HTTP client.
   *
   * @return \Psr\Http\Client\ClientInterface
   *   The HTTP client.
   */
  public function getClient(): ClientInterface {
    return $this->httpClient;
  }

  /**
   * {@inheritdoc}
   */
  public function loadModelsForm(array $form, $form_state, string $operation_type, ?string $model_id = NULL): array {
    $form = parent::loadModelsForm($form, $form_state, $operation_type, $model_id);
    $config = $this->loadModelConfig($operation_type, $model_id);

    // Get available processors from global config.
    $global_config = $this->getConfig();
    $selected_processor = $global_config->get('processor_id');

    if (empty($selected_processor)) {
      $form['model_data']['processor_warning'] = [
        '#markup' => '<div class="messages messages--warning">' .
        $this->t('No processor selected in global configuration. Please configure a processor first.') .
        '</div>',
      ];
    }

    if ($operation_type === 'document_to_text') {
      $form['model_data']['processor_id'] = [
        '#type' => 'textfield',
        '#title' => $this->t('Document AI Processor ID'),
        '#description' => $this->t('The Document AI processor ID to use for OCR (from global configuration).'),
        '#default_value' => $config['processor_id'] ?? $selected_processor,
        '#required' => TRUE,
        '#disabled' => !empty($selected_processor),
      ];

      if (!empty($selected_processor)) {
        $form['model_data']['processor_note'] = [
          '#markup' => '<div class="description">' .
          $this->t('Using processor from global configuration: <strong>@processor</strong>', ['@processor' => $selected_processor]) .
          '</div>',
        ];
      }

      $form['model_data']['extract_structured_data'] = [
        '#type' => 'checkbox',
        '#title' => $this->t('Extract Structured Data'),
        '#description' => $this->t('Whether to extract structured data in addition to plain text.'),
        '#default_value' => $config['extract_structured_data'] ?? TRUE,
      ];

      $form['model_data']['confidence_threshold'] = [
        '#type' => 'number',
        '#title' => $this->t('Confidence Threshold'),
        '#description' => $this->t('Minimum confidence level for text extraction (0.0-1.0).'),
        '#default_value' => $config['confidence_threshold'] ?? 0.8,
        '#min' => 0.0,
        '#max' => 1.0,
        '#step' => 0.1,
        '#required' => FALSE,
      ];
    }

    return $form;
  }

  /**
   * Create Document AI client with proper authentication.
   *
   * @return \Google\Cloud\DocumentAI\V1\Client\DocumentProcessorServiceClient
   *   The Document AI client.
   *
   * @throws \Drupal\ai\Exception\AiBadRequestException
   *   If client creation fails.
   */
  protected function createDocumentAiClient(): DocumentProcessorServiceClient {
    $credentials = $this->loadCredentials();
    if (!$credentials) {
      throw new AiBadRequestException((string) $this->t('Unable to load Google Cloud credentials.'));
    }

    try {
      // Create client with service account credentials.
      $client = new DocumentProcessorServiceClient([
        'credentials' => $credentials,
      ]);

      return $client;
    }
    catch (\Exception $e) {
      throw new AiBadRequestException((string) $this->t('Failed to create Document AI client: @message', ['@message' => $e->getMessage()]));
    }
  }

  /**
   * Get the location of credentials from the key module or file.
   *
   * @return array|null
   *   Decoded contents of credentials file.
   */
  protected function loadCredentials(): ?array {
    $config = $this->getConfig();
    $key_id = $config->get('general_credentials_file');

    // Use key-based credentials only.
    if ($key_id && $this->moduleHandler->moduleExists('key')) {
      try {
        $key = $this->keyRepository->getKey($key_id);

        if ($key && ($file_content = $key->getKeyValue())) {
          return Json::decode($file_content);
        }
      }
      catch (\Exception $e) {
        // Return null on failure.
      }
    }

    return NULL;
  }

  /**
   * Extract project ID from Google Cloud credentials.
   *
   * @return string|null
   *   The project ID from the credentials file, or null if not found.
   */
  protected function getProjectIdFromCredentials(): ?string {
    $credentials = $this->loadCredentials();
    return $credentials['project_id'] ?? NULL;
  }

  /**
   * Get the correct Document AI endpoint based on region.
   *
   * @param string $location
   *   The location/region identifier.
   *
   * @return string
   *   The base URL for the Document AI API endpoint.
   */
  protected function getDocumentAiEndpoint(string $location): string {
    // Multi-region endpoints.
    if ($location === 'us') {
      return 'https://documentai.googleapis.com';
    }
    if ($location === 'eu') {
      return 'https://eu-documentai.googleapis.com';
    }

    // Regional endpoints follow pattern:
    // https://{location-id}-documentai.googleapis.com
    return "https://{$location}-documentai.googleapis.com";
  }

  /**
   * Filter text by confidence threshold.
   *
   * @param array $document
   *   The document data from Google Document AI.
   * @param float $confidence_threshold
   *   The minimum confidence level.
   *
   * @return string
   *   The filtered text.
   */
  protected function filterTextByConfidence(array $document, float $confidence_threshold): string {
    $filtered_text = '';

    if (isset($document['pages'])) {
      foreach ($document['pages'] as $page) {
        if (isset($page['blocks'])) {
          foreach ($page['blocks'] as $block) {
            if (($block['layout']['confidence'] ?? 0) >= $confidence_threshold) {
              $block_text = $this->getTextFromTextAnchor($document['text'], $block['layout']['textAnchor'] ?? []);
              $filtered_text .= $block_text . "\n";
            }
          }
        }
      }
    }

    return trim($filtered_text) ?: ($document['text'] ?? '');
  }

  /**
   * Extract structured data from document.
   *
   * @param array $document
   *   The document data from Google Document AI.
   *
   * @return array
   *   The structured data.
   */
  protected function extractStructuredData(array $document): array {
    $structured_data = [
      'pages' => [],
      'paragraphs' => [],
      'entities' => [],
      'tables' => [],
    ];

    // Extract pages information.
    if (isset($document['pages'])) {
      foreach ($document['pages'] as $page) {
        $page_data = [
          'page_number' => $page['pageNumber'] ?? 0,
          'width' => $page['dimension']['width'] ?? 0,
          'height' => $page['dimension']['height'] ?? 0,
          'confidence' => $page['layout']['confidence'] ?? 0,
        ];
        $structured_data['pages'][] = $page_data;
      }
    }

    // Extract paragraphs.
    if (isset($document['paragraphs'])) {
      foreach ($document['paragraphs'] as $paragraph) {
        $paragraph_data = [
          'text' => $this->getTextFromTextAnchor($document['text'], $paragraph['layout']['textAnchor'] ?? []),
          'confidence' => $paragraph['layout']['confidence'] ?? 0,
        ];
        $structured_data['paragraphs'][] = $paragraph_data;
      }
    }

    // Extract entities.
    if (isset($document['entities'])) {
      foreach ($document['entities'] as $entity) {
        $entity_data = [
          'type' => $entity['type'] ?? '',
          'mention_text' => $entity['mentionText'] ?? '',
          'confidence' => $entity['confidence'] ?? 0,
        ];
        $structured_data['entities'][] = $entity_data;
      }
    }

    return $structured_data;
  }

  /**
   * {@inheritdoc}
   */
  public function documentToText(string|DocumentToTextInput $input, string $model_id, array $tags = []): DocumentToTextOutput {
    // Check if provider is properly configured before processing.
    if (!$this->isConfigured()) {
      throw new AiBadRequestException((string) $this->t('AI Document OCR provider is not configured. Please configure credentials and settings first.'));
    }

    $global_config = $this->getConfig();
    $project_id = $this->getProjectIdFromCredentials();
    $region = $global_config->get('default_region') ?? 'us';

    // Try to get processor_id from multiple sources.
    $info = $this->getModelInfo('document_to_text', $model_id);
    $processor_id = $info['processor_id'] ?? $global_config->get('processor_id');

    if (!$processor_id) {
      throw new AiBadRequestException((string) $this->t('No processor ID configured. Please configure a processor in the provider settings.'));
    }

    if (!$project_id) {
      throw new AiBadRequestException((string) $this->t('Missing project ID configuration.'));
    }

    // Extract input data.
    if ($input instanceof DocumentToTextInput) {
      $document_content = $input->getDocumentContent();
      $mime_type = $input->getMimeType();
      $filename = $input->getFilename();
    }
    else {
      // If it's a string, assume it's base64 encoded PDF.
      $document_content = $input;
      $mime_type = 'application/pdf';
      $filename = NULL;
    }

    try {
      // Initialize Google Cloud Document AI client.
      $client = $this->createDocumentAiClient();

      // Decode base64 content if needed.
      $binary_content = base64_decode($document_content);
      if ($binary_content === FALSE) {
        // If base64_decode fails, assume it's already binary.
        $binary_content = $document_content;
      }

      // Create RawDocument.
      $rawDocument = (new RawDocument())
        ->setContent($binary_content)
        ->setMimeType($mime_type);

      // Get the fully-qualified processor name.
      $fullProcessorName = $client->processorName($project_id, $region, $processor_id);

      // Create ProcessRequest.
      $request = (new ProcessRequest())
        ->setName($fullProcessorName)
        ->setRawDocument($rawDocument);

      // Process the document.
      $response = $client->processDocument($request);
      $document = $response->getDocument();

      // Convert Google Cloud response to our expected format.
      $data = [
        'document' => [
          'text' => $document->getText(),
          'pages' => [],
          'entities' => [],
          'paragraphs' => [],
        ],
      ];

      // Safely extract pages information and nested elements.
      try {
        foreach ($document->getPages() as $page) {
          $page_data = [
            'pageNumber' => method_exists($page, 'getPageNumber') ? $page->getPageNumber() : 0,
            'dimension' => [],
            'layout' => [
              'confidence' => 0,
            ],
          ];

          // Safely get page dimensions.
          if (method_exists($page, 'getDimension') && $page->getDimension()) {
            $dimension = $page->getDimension();
            $page_data['dimension'] = [
              'width' => method_exists($dimension, 'getWidth') ? $dimension->getWidth() : 0,
              'height' => method_exists($dimension, 'getHeight') ? $dimension->getHeight() : 0,
            ];
          }

          // Safely get layout confidence.
          if (method_exists($page, 'getLayout') && $page->getLayout()) {
            $layout = $page->getLayout();
            if (method_exists($layout, 'getConfidence')) {
              $page_data['layout']['confidence'] = $layout->getConfidence();
            }
          }

          $data['document']['pages'][] = $page_data;

          // Extract paragraphs from this page if available.
          if (method_exists($page, 'getParagraphs')) {
            foreach ($page->getParagraphs() as $paragraph) {
              $paragraph_text = '';
              $paragraph_confidence = 0;

              if (method_exists($paragraph, 'getLayout') && $paragraph->getLayout()) {
                $layout = $paragraph->getLayout();
                if (method_exists($layout, 'getTextAnchor')) {
                  $paragraph_text = $this->getTextFromTextAnchor(
                    $document->getText(),
                    $layout->getTextAnchor()
                  );
                }
                if (method_exists($layout, 'getConfidence')) {
                  $paragraph_confidence = $layout->getConfidence();
                }
              }

              $data['document']['paragraphs'][] = [
                'text' => $paragraph_text,
                'confidence' => $paragraph_confidence,
              ];
            }
          }
        }
      }
      catch (\Exception $e) {
        // Continue processing if page extraction fails.
      }

      // Extract entities (from document level) if available.
      try {
        if (method_exists($document, 'getEntities')) {
          foreach ($document->getEntities() as $entity) {
            $entity_data = [
              'type' => method_exists($entity, 'getType') ? $entity->getType() : '',
              'mentionText' => method_exists($entity, 'getMentionText') ? $entity->getMentionText() : '',
              'confidence' => method_exists($entity, 'getConfidence') ? $entity->getConfidence() : 0,
            ];
            $data['document']['entities'][] = $entity_data;
          }
        }
      }
      catch (\Exception $e) {
        // Continue processing if entity extraction fails.
      }

      $extracted_text = $data['document']['text'] ?? '';
      $confidence_threshold = $tags['confidence_threshold'] ?? $info['confidence_threshold'] ?? $global_config->get('default_confidence_threshold') ?? 0.8;

      // Filter by confidence if configured.
      if ($confidence_threshold > 0) {
        $extracted_text = $this->filterTextByConfidence($data['document'], $confidence_threshold);
      }

      $raw_output = [
        'document_data' => $data['document'],
        'processing_info' => [
          'model_id' => $model_id,
          'processor_id' => $processor_id,
          'location' => $region,
          'confidence_threshold' => $confidence_threshold,
        ],
        'metadata' => [
          'pages' => count($data['document']['pages'] ?? []),
          'mime_type' => $mime_type,
          'filename' => $filename,
        ],
      ];

      // Calculate overall confidence.
      $overall_confidence = $this->calculateOverallConfidence($data['document']);

      // Extract structured data if enabled.
      $structured_data = [];
      if ($tags['extract_structured_data'] ?? $info['extract_structured_data'] ?? $global_config->get('default_extract_structured_data') ?? TRUE) {
        $structured_data = $this->extractStructuredData($data['document']);
      }

      $metadata = [
        'pages' => count($data['document']['pages'] ?? []),
        'mime_type' => $mime_type,
        'filename' => $filename,
        'processor_id' => $processor_id,
        'location' => $region,
      ];

      return new DocumentToTextOutput(
        $extracted_text,
        $overall_confidence,
        $structured_data,
        $raw_output,
        $metadata
      );
    }
    catch (\Exception $e) {
      throw new AiBadRequestException((string) $this->t('Document OCR processing failed: @message', ['@message' => $e->getMessage()]));
    }
  }

  /**
   * Apply provider-level default settings to options.
   *
   * @param array $options
   *   The input options.
   *
   * @return array
   *   Options with provider defaults applied.
   */
  protected function applyProviderDefaults(array $options): array {
    $config = $this->getConfig();

    // Apply defaults for missing values.
    $defaults = [
      'confidence_threshold' => $config->get('default_confidence_threshold') ?? 0.8,
      'extract_structured_data' => $config->get('default_extract_structured_data') ?? TRUE,
      'timeout' => $config->get('default_timeout') ?? 300,
      'max_file_size' => $config->get('default_max_file_size') ?? 20,
      'cache_enabled' => $config->get('cache_enabled') ?? TRUE,
      'cache_duration' => $config->get('cache_duration') ?? 3600,
    ];

    // Merge with provided options (provided options take precedence)
    return array_merge($defaults, $options);
  }

  /**
   * Get appropriate prompt template for model type.
   *
   * @param string $model_id
   *   The model ID.
   *
   * @return string
   *   The prompt template.
   */
  protected function getPromptTemplate(string $model_id): string {
    $config = $this->getConfig();

    if (str_contains($model_id, 'form')) {
      return $config->get('form_prompt') ?? (string) $this->t('Extract all text and form fields from this document. Identify key-value pairs, tables, and structured data elements.');
    }
    elseif (str_contains($model_id, 'handwriting')) {
      return $config->get('handwriting_prompt') ?? (string) $this->t('Extract handwritten text from this document. Focus on accuracy and readability of the handwritten content.');
    }
    else {
      return $config->get('general_prompt') ?? (string) $this->t('Extract all text from this document using OCR. Return only the extracted text content, preserving formatting where possible.');
    }
  }

  /**
   * Calculate overall confidence from document data.
   *
   * @param array $document
   *   The document data from Google Document AI.
   *
   * @return float
   *   The overall confidence score.
   */
  protected function calculateOverallConfidence(array $document): float {
    $confidences = [];

    if (isset($document['pages'])) {
      foreach ($document['pages'] as $page) {
        if (isset($page['blocks'])) {
          foreach ($page['blocks'] as $block) {
            $confidence = $block['layout']['confidence'] ?? 0;
            if ($confidence > 0) {
              $confidences[] = $confidence;
            }
          }
        }
      }
    }

    return empty($confidences) ? 0.0 : array_sum($confidences) / count($confidences);
  }

  /**
   * Helper method to extract text from text anchor.
   *
   * @param string $full_text
   *   The full document text.
   * @param \Google\Cloud\DocumentAI\V1\Document\TextAnchor|array|null $text_anchor
   *   The text anchor data.
   *
   * @return string
   *   The extracted text segment.
   */
  protected function getTextFromTextAnchor(string $full_text, $text_anchor): string {
    if (!$text_anchor) {
      return '';
    }

    // Handle Google Cloud TextAnchor object.
    if (is_object($text_anchor) && method_exists($text_anchor, 'getTextSegments')) {
      $segments = $text_anchor->getTextSegments();
    }
    // Handle array format (legacy)
    elseif (is_array($text_anchor) && !empty($text_anchor['textSegments'])) {
      $segments = $text_anchor['textSegments'];
    }
    else {
      return '';
    }

    $text = '';
    foreach ($segments as $segment) {
      if (is_object($segment)) {
        // Google Cloud TextSegment object.
        $start = method_exists($segment, 'getStartIndex') ? $segment->getStartIndex() : 0;
        $end = method_exists($segment, 'getEndIndex') ? $segment->getEndIndex() : strlen($full_text);
      }
      else {
        // Array format (legacy)
        $start = $segment['startIndex'] ?? 0;
        $end = $segment['endIndex'] ?? strlen($full_text);
      }

      // Ensure we don't go beyond the text bounds.
      $start = max(0, min($start, strlen($full_text)));
      $end = max($start, min($end, strlen($full_text)));

      if ($end > $start) {
        $text .= substr($full_text, $start, $end - $start);
      }
    }

    return $text;
  }

}
