<?php

declare(strict_types=1);

namespace Drupal\ai_document_ocr\OperationType\DocumentToText;

use Drupal\ai\OperationType\OutputInterface;

/**
 * Output for Document to Text operations (OCR).
 */
class DocumentToTextOutput implements OutputInterface {

  /**
   * The extracted text.
   *
   * @var string
   */
  protected string $text;

  /**
   * The confidence score (0.0 to 1.0).
   *
   * @var float
   */
  protected float $confidence;

  /**
   * The structured data from OCR processing.
   *
   * @var array
   */
  protected array $structuredData;

  /**
   * The raw output from the provider.
   *
   * @var array
   */
  protected array $rawOutput;

  /**
   * Processing metadata.
   *
   * @var array
   */
  protected array $metadata;

  /**
   * Constructor.
   *
   * @param string $text
   *   The extracted text.
   * @param float $confidence
   *   The confidence score.
   * @param array $structured_data
   *   The structured data.
   * @param array $raw_output
   *   The raw output from the provider.
   * @param array $metadata
   *   Processing metadata.
   */
  public function __construct(
    string $text,
    float $confidence = 0.0,
    array $structured_data = [],
    array $raw_output = [],
    array $metadata = [],
  ) {
    $this->text = $text;
    $this->confidence = $confidence;
    $this->structuredData = $structured_data;
    $this->rawOutput = $raw_output;
    $this->metadata = $metadata;
  }

  /**
   * {@inheritdoc}
   */
  public function getNormalized(): string {
    return $this->text;
  }

  /**
   * {@inheritdoc}
   */
  public function getRawOutput(): array {
    return $this->rawOutput;
  }

  /**
   * Gets the extracted text.
   *
   * @return string
   *   The extracted text.
   */
  public function getText(): string {
    return $this->text;
  }

  /**
   * Gets the confidence score.
   *
   * @return float
   *   The confidence score (0.0 to 1.0).
   */
  public function getConfidence(): float {
    return $this->confidence;
  }

  /**
   * Gets the structured data.
   *
   * @return array
   *   The structured data from OCR processing.
   */
  public function getStructuredData(): array {
    return $this->structuredData;
  }

  /**
   * Gets the processing metadata.
   *
   * @return array
   *   The processing metadata.
   */
  public function getMetadata(): array {
    return $this->metadata;
  }

  /**
   * Gets pages information.
   *
   * @return array
   *   Array of page information.
   */
  public function getPages(): array {
    return $this->structuredData['pages'] ?? [];
  }

  /**
   * Gets paragraphs information.
   *
   * @return array
   *   Array of paragraph information.
   */
  public function getParagraphs(): array {
    return $this->structuredData['paragraphs'] ?? [];
  }

  /**
   * Gets entities information.
   *
   * @return array
   *   Array of entity information.
   */
  public function getEntities(): array {
    return $this->structuredData['entities'] ?? [];
  }

  /**
   * Gets tables information.
   *
   * @return array
   *   Array of table information.
   */
  public function getTables(): array {
    return $this->structuredData['tables'] ?? [];
  }

  /**
   * Convert output to array format.
   */
  public function toArray(): array {
    return [
      'text' => $this->text,
      'confidence' => $this->confidence,
      'structured_data' => $this->structuredData,
      'raw_output' => $this->rawOutput,
      'metadata' => $this->metadata,
    ];
  }

}
