<?php

namespace Drupal\dify;

use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;

/**
 * Provides a client for the Dify API.
 *
 * @see https://docs.dify.ai/
 */
class DifyClient {

  /**
   * The maximum size of a segment.
   */
  public const SEGMENT_MAX_SIZE = 1000;

  /**
   * Dataset V1 endpoint.
   *
   * @var string
   */
  public const DATASET_ENDPOINT = 'v1/datasets';

  /**
   * API client.
   *
   * @var \GuzzleHttp\Client
   */
  private Client $client;

  /**
   * File client.
   *
   * @var \GuzzleHttp\Client
   */
  private Client $fileClient;

  /**
   * Creates a new Guzzle client.
   *
   * @param string $base_url
   *   The base URL of the Dify API.
   * @param string $authorization_token
   *   The authorization token to use for requests.
   */
  public function __construct(string $base_url, string $authorization_token) {
    $this->client = new Client([
      'base_uri' => $base_url,
      'headers' => [
        'Authorization' => 'Bearer ' . $authorization_token,
        'Content-Type' => 'application/json',
      ],
    ]);

    $this->fileClient = new Client([
      'base_uri' => $base_url,
      'headers' => [
        'Authorization' => 'Bearer ' . $authorization_token,
        'Content-Type' => 'multipart/form-data',
      ],
    ]);
  }

  /**
   * Retrieves a list of datasets.
   *
   * @param int|null $page
   *   Page number (optional).
   * @param int $limit
   *   Number of items returned, default 1, range 1-100.
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function getKnowledgeBaseList(?int $page = NULL, int $limit = 1): array {
    $response = $this->client->request('GET', self::DATASET_ENDPOINT, [
      'query' => [
        'page' => $page,
        'limit' => $limit,
      ],
    ]);
    $decoded = json_decode($response->getBody()->getContents(), TRUE);
    return is_array($decoded) ? $decoded : [];
  }

  /**
   * Retrieves documents from a specific dataset.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   * @param string|null $keyword
   *   Search keywords, currently only search document names (optional).
   * @param int|null $page
   *   Page number (optional).
   * @param int $limit
   *   Number of items returned, default 1, range 1-100 (optional).
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function getDocuments(string $dataset_id, ?string $keyword = NULL, ?int $page = NULL, int $limit = 1): array {
    $response = $this->client->request('GET', self::DATASET_ENDPOINT . "/{$dataset_id}/documents", [
      'query' => [
        'page' => $page,
        'limit' => $limit,
        'keyword' => $keyword,
      ],
    ]);
    $decoded = json_decode($response->getBody()->getContents(), TRUE);

    // Validate response structure.
    if (!is_array($decoded) || !isset($decoded['data'])) {
      return ['data' => [], 'has_more' => FALSE, 'total' => 0];
    }

    return $decoded;
  }

  /**
   * Retrieves all documents from a specific dataset.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   * @param int $max_pages
   *   Maximum number of pages to fetch (safety limit).
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function getAllDocuments(string $dataset_id, int $max_pages = 100): array {
    $documents = [];
    $page = 1;
    do {
      $dataset = $this->getDocuments($dataset_id, page: $page, limit: 100);
      $documents[] = $dataset['data'];
      $page++;
    } while ($dataset['has_more'] && $page <= $max_pages);

    return array_merge([], ...$documents);
  }

  /**
   * Retrieves a document from a specific dataset by keyword.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   * @param string $keyword
   *   Search keywords, currently only search document names (optional).
   *
   * @return array|null
   *   The response from the Dify API or NULL if no document is found.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function getDocumentByKeyword(string $dataset_id, string $keyword): ?array {
    $dataset = $this->getDocuments($dataset_id, $keyword, limit: 1);

    return $dataset['data'][0] ?? NULL;
  }

  /**
   * Deletes all documents from a specific dataset.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   *
   * @return array
   *   Documents that could not be deleted.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function deleteAllDocuments(string $dataset_id): array {
    $documents = $this->getAllDocuments($dataset_id);
    $documents_with_errors = [];
    foreach ($documents as $document) {
      try {
        $this->deleteDocument($dataset_id, $document['id']);
      }
      catch (\Exception | GuzzleException $e) {
        $documents_with_errors[] = $document;
      }
    }
    return $documents_with_errors;
  }

  /**
   * Deletes a document from a specific dataset.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   * @param string $document_id
   *   Document ID.
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function deleteDocument(string $dataset_id, string $document_id): array {
    $response = $this->client->request('DELETE', self::DATASET_ENDPOINT . "/{$dataset_id}/documents/{$document_id}");
    $decoded = json_decode($response->getBody()->getContents(), TRUE);
    return is_array($decoded) ? $decoded : [];
  }

  /**
   * Creates a document from text.
   *
   * @param string $dataset_id
   *   The ID of the dataset.
   * @param string $doc_name
   *   The name of the document.
   * @param string $data
   *   Request Body.
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function createDocumentFromText(string $dataset_id, string $doc_name, string $data): array {
    $response = $this->client->request('POST', self::DATASET_ENDPOINT . "/{$dataset_id}/document/create-by-text", [
      'json' => $this->getTextIndexingOptions($doc_name, $data),
    ]);

    if ($response->getStatusCode() !== 200) {
      throw new \RuntimeException('Unexpected response status: ' . $response->getStatusCode());
    }

    $decoded = json_decode($response->getBody()->getContents(), TRUE);
    return is_array($decoded) ? $decoded : [];
  }

  /**
   * Updates a document from text.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   * @param string $document_id
   *   Document ID.
   * @param string $doc_name
   *   Document name (optional).
   * @param string $data
   *   Document content (optional).
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function updateDocumentFromText(string $dataset_id, string $document_id, string $doc_name, string $data): array {
    $response = $this->client->request('POST', self::DATASET_ENDPOINT . "/{$dataset_id}/documents/{$document_id}/update-by-text", [
      'json' => [
        'name' => $doc_name,
        'text' => $data,
      ],
    ]);
    $decoded = json_decode($response->getBody()->getContents(), TRUE);
    return is_array($decoded) ? $decoded : [];
  }

  /**
   * Creates a document from a file.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   * @param string $file_path
   *   The path to the file to be uploaded.
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function createDocumentFromFile(string $dataset_id, string $file_path): array {
    // Get initial options.
    $options = $this->getFileIndexingOptions();

    // Use the options in the request.
    $response = $this->fileClient->request('POST', self::DATASET_ENDPOINT . "/{$dataset_id}/document/create-by-file", [
      'multipart' => [
        [
          'name' => 'data',
          'contents' => json_encode($options),
          'headers' => ['Content-Type' => 'text/csv'],
        ],
        [
          'name' => 'file',
          'contents' => fopen($file_path, 'rb'),
        ],
      ],
    ]);

    $decoded = json_decode($response->getBody()->getContents(), TRUE);
    return is_array($decoded) ? $decoded : [];
  }

  /**
   * Updates a document from a file.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   * @param string $document_id
   *   Document ID.
   * @param string $file_path
   *   The path to the file to be uploaded.
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function updateDocumentFromFile(string $dataset_id, string $document_id, string $file_path): array {
    // Get initial options.
    $options = $this->getFileIndexingOptions();

    // Use the options in your request.
    $response = $this->fileClient->request('POST', self::DATASET_ENDPOINT . "/{$dataset_id}/documents/{$document_id}/update-by-file", [
      'multipart' => [
        [
          'name' => 'data',
          'contents' => json_encode($options),
          'headers' => ['Content-Type' => 'text/plain'],
        ],
        [
          'name' => 'file',
          'contents' => fopen($file_path, 'rb'),
        ],
      ],
    ]);

    $decoded = json_decode($response->getBody()->getContents(), TRUE);
    return is_array($decoded) ? $decoded : [];
  }

  /**
   * Get text indexing options.
   *
   * @param string $doc_name
   *   The name of the document.
   * @param string $data
   *   The data to be indexed.
   *
   * @return array
   *   Indexing options.
   */
  protected function getTextIndexingOptions(string $doc_name, string $data): array {
    return [
      "name" => $doc_name,
      "text" => $data,
      "indexing_technique" => "high_quality",
      "process_rule" => [
        "rules" => [
          "pre_processing_rules" => [
            ["id" => "remove_extra_spaces", "enabled" => TRUE],
            ["id" => "remove_urls_emails", "enabled" => FALSE],
          ],
          "segmentation" => [
            "separator" => ";",
            "max_tokens" => static::SEGMENT_MAX_SIZE,
          ],
        ],
        "mode" => "custom",
      ],
    ];
  }

  /**
   * Get file indexing options.
   *
   * @return array
   *   Indexing options.
   */
  protected function getFileIndexingOptions(): array {
    return [
      "indexing_technique" => "high_quality",
      "process_rule" => [
        "rules" => [
          "pre_processing_rules" => [
            ["id" => "remove_extra_spaces", "enabled" => TRUE],
            ["id" => "remove_urls_emails", "enabled" => FALSE],
          ],
          "segmentation" => [
            "separator" => ";",
            "max_tokens" => static::SEGMENT_MAX_SIZE,
          ],
        ],
        "mode" => "custom",
      ],
    ];
  }

  /**
   * Creates a metadata field in the knowledge base.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   * @param string $name
   *   The name of the metadata field.
   * @param string $type
   *   The type of the metadata field (string, number, time).
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function createMetadataField(string $dataset_id, string $name, string $type = 'string'): array {
    $response = $this->client->request('POST', self::DATASET_ENDPOINT . "/{$dataset_id}/metadata", [
      'json' => [
        'type' => $type,
        'name' => $name,
      ],
    ]);
    $decoded = json_decode($response->getBody()->getContents(), TRUE);
    return is_array($decoded) ? $decoded : [];
  }

  /**
   * Gets the list of metadata fields for a knowledge base.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function getMetadataFields(string $dataset_id): array {
    $response = $this->client->request('GET', self::DATASET_ENDPOINT . "/{$dataset_id}/metadata");
    $decoded = json_decode($response->getBody()->getContents(), TRUE);
    return is_array($decoded) ? $decoded : [];
  }

  /**
   * Assigns metadata to documents.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   * @param array $operation_data
   *   Array of document metadata assignments.
   *   Format: [
   *     [
   *       'document_id' => 'doc_id',
   *       'metadata_list' => [
   *         ['id' => 'field_id', 'value' => 'value', 'name' => 'field_name']
   *       ]
   *     ]
   *   ].
   *
   * @return array
   *   The response from the Dify API.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function assignDocumentMetadata(string $dataset_id, array $operation_data): array {
    $response = $this->client->request('POST', self::DATASET_ENDPOINT . "/{$dataset_id}/documents/metadata", [
      'json' => [
        'operation_data' => $operation_data,
      ],
    ]);
    $decoded = json_decode($response->getBody()->getContents(), TRUE);
    return is_array($decoded) ? $decoded : [];
  }

  /**
   * Ensures a metadata field exists, creating it if necessary.
   *
   * @param string $dataset_id
   *   Knowledge ID.
   * @param string $field_name
   *   The name of the metadata field.
   * @param string $type
   *   The type of the metadata field (string, number, time).
   *
   * @return array|null
   *   The metadata field information or NULL if creation failed.
   *
   * @throws \GuzzleHttp\Exception\GuzzleException
   */
  public function ensureMetadataField(string $dataset_id, string $field_name, string $type = 'string'): ?array {
    // First, check if the field already exists.
    $metadata_fields = $this->getMetadataFields($dataset_id);

    if (isset($metadata_fields['doc_metadata'])) {
      foreach ($metadata_fields['doc_metadata'] as $field) {
        if ($field['name'] === $field_name) {
          return $field;
        }
      }
    }

    // Field doesn't exist, create it.
    try {
      return $this->createMetadataField($dataset_id, $field_name, $type);
    }
    catch (\Exception | GuzzleException $e) {
      // Field creation failed, return NULL.
      return NULL;
    }
  }

}
