<?php

namespace Drupal\ai_migration;

use Drupal\ai\AiProviderPluginManager;
use Drupal\ai\OperationType\Chat\ChatInput;
use Drupal\ai\OperationType\Chat\ChatMessage;
use Drupal\ai_migration\Service\AiMigrationCacheProviderInterface;
use Drupal\Core\Http\ClientFactory;
use Drupal\Core\Logger\LoggerChannelFactoryInterface;
use Drupal\schemata\SchemaFactory;
use Symfony\Component\Serializer\Serializer;

/**
 * Class to handle AI migrations.
 *
 * This class is responsible for converting URLs
 * to entities using the AI provider.
 */
class AiMigrator {

  /**
   * The schema format for Schemata and Drupal's core serialization.
   */
  private const SCHEMA_FORMAT = 'schema_json:api_json';

  /**
   * The AI provider plugin manager.
   *
   * @var \Drupal\ai\AiProviderPluginManager
   */
  public $aiProviderPluginManager;

  /**
   * The ai migrator cache provider.
   *
   * @var \Drupal\ai_migration\Service\AiMigrationCacheProviderInterface
   */
  protected $cache;

  /**
   * Drupal HTTP client.
   *
   * @var \Drupal\Core\Http\ClientFactory
   */
  protected $httpClient;

  /**
   * The logger service.
   *
   * @var \Drupal\Core\Logger\LoggerChannelInterface
   */
  protected $logger;

  /**
   * The ID of the AI provider we are using.
   *
   * @var string
   */
  private $providerId;

  /**
   * The model ID we are using.
   *
   * @var string
   */
  private $modelId;

  /**
   * The URL being processed.
   *
   * @var string
   */
  public string $url = '';

  /**
   * The item selector for the entity being processed.
   *
   * @var string
   */
  public string $itemSelector = '';

  /**
   * The serializer responsible for serializing data before sending to provider.
   *
   * @var \Symfony\Component\Serializer\Serializer
   */
  protected Serializer $serializer;

  /**
   * The schemata factory service.
   *
   * @var \Drupal\schemata\SchemaFactory
   */
  protected SchemaFactory $schemaFactory;

  /**
   * Constructs a new AiMigrator.
   */
  public function __construct(
    AiProviderPluginManager $aiProviderPluginManager,
    AiMigrationCacheProviderInterface $aiMigrationCacheProvider,
    LoggerChannelFactoryInterface $loggerChannelFactory,
    ClientFactory $httpClientFactory,
    Serializer $serializer,
    SchemaFactory $schemaFactory,
  ) {
    // Set the HTTP client with a timeout and error handling.
    $this->httpClient = $httpClientFactory->fromOptions([
      'timeout' => 30,
      'http_errors' => FALSE,
    ]);

    // Set the AI provider plugin manager.
    $this->aiProviderPluginManager = $aiProviderPluginManager;

    // Set the logger service, too.
    $this->logger = $loggerChannelFactory->get('ai_migration');

    // @todo Set the AI migration cache provider based on config.
    $this->cache = $aiMigrationCacheProvider;

    // Set the serializer service.
    $this->serializer = $serializer;

    // Set the AI migration schema builder.
    $this->schemaFactory = $schemaFactory;
  }

  /**
   * Converts a URL to migratable data using the AI provider.
   *
   * @param string $url
   *   The URL for the content we want to migrate.
   * @param string $html
   *   The HTML to migrate if we already have it.
   * @param string $entity_type
   *   The entity type we're migrating into.
   * @param string $bundle
   *   The bundle we're migrating into.
   * @param string $item_selector
   *   The parent field for the entity being migrated.
   *
   * @return array|bool
   *   The converted content or FALSE if conversion fails.
   */
  public function convert(string $url = '', string $html = '', string $entity_type = '', string $bundle = '', string $item_selector = ''): array|bool {
    $this->url = $url;
    $this->itemSelector = $item_selector;

    $default_provider = $this->aiProviderPluginManager->getDefaultProviderForOperationType('chat');
    if (empty($default_provider['provider_id']) || empty($default_provider['model_id'])) {
      return FALSE;
    }

    // @todo We want to let the consumer override the default provider and model.
    // For now, we will use the default provider and model.
    $this->providerId = $default_provider['provider_id'];
    $this->modelId = $default_provider['model_id'];

    $provider = $this->aiProviderPluginManager->createInstance($this->providerId, [
      'model_id' => $this->modelId,
      'http_client_options' => [
        'timeout' => 300,
      ],
    ]);

    // Create the schema and log it.
    $schema = $this->createSchema($entity_type, $bundle);
    $this->logger->debug('AI migration schema for url @url: <pre>@schema</pre>',
      ['@url' => $url, '@schema' => $schema]);

    // Set system role.
    $provider->setChatSystemRole('You are an API that attempts to parse HTML content and find
      the important parts of a web page. You will return this as a JSON object with this schema: '
      . $schema . '. The response is to be returned in validated JSON format without the schema that was used.');

    // If we don't have html, get it.
    if (empty($html)) {
      try {
        $response = $this->httpClient->request('GET', $url);
        $html = $response->getBody()->getContents();
      }
      catch (\Exception $e) {
        $this->logger->error('Failed to fetch content from URL @url: @message',
          ['@url' => $url, '@message' => $e->getMessage()]);
        return FALSE;
      }
    }

    $prompt = $this->createPrompt($url, $html);
    $messages = new ChatInput([
      new ChatMessage('user', $prompt),
    ]);

    $cachedResponse = $this->cache->getPromptResponse($prompt, $this->providerId, $this->modelId);
    if ($cachedResponse) {
      return $cachedResponse;
    }

    try {
      $response = $provider->chat($messages, $this->modelId,
        ['ai-migration'])->getNormalized()->getText();
    }
    catch (\Exception $e) {
      $this->logger->error('AI migration failed for url @url: @message',
        ['@url' => $url, '@message' => $e->getMessage()]);
      return FALSE;
    }

    // Clean up the response.
    $clean_response = $this->normalizeResponse($response, $bundle);

    // Cache the response for future use.
    $this->cache->setPromptResponse($clean_response, $prompt, $this->providerId, $this->modelId);

    // Log for debugging purpose for now.
    // We may want to put this behind a setting later.
    $this->logger->debug('AI migration response for url @url: <pre>@response</pre>',
      ['@url' => $url, '@response' => print_r($clean_response, TRUE)]);

    return $clean_response;
  }

  /**
   * Creates a prompt based on the URL.
   *
   * @param string $url
   *   The URL being processed.
   * @param string $content
   *   The content to create a prompt from.
   *
   * @return string
   *   The created prompt.
   */
  public function createPrompt(string $url = '', string $content = ''): string {
    // Create a prompt based on the content.
    $prompt = <<<PROMPT
    You can use HTML within the structure that you return,
    but do not place HTML <head> or <body> tags in any element of the schema.

    For any urls that are returned for a field in the schema, make sure they
    are absolute and not relative. This would apply to links or images.
    As an example of relative to absolute conversion, if the image href is
    /image1.jpg it would become https://example.com/image1.jpg
    if hostname is https://example.com and if image href is sub/image1.jpg
    when page url is https://example.com/level the url returned would be
    https://example.com/level/sub/image1.jpg.

    For any images where path is similar to https://example.com/sites/default/files/styles/max_width_288px/public/2022-09/IET.PNG?itok=3PK-CwYZ,
    refer to /styles/max_width_288px/public/ as "Drupal style path" of the URL (regex pattern would look like "/\/styles\/[^\/]+\/public\//").
    Remove the query string and the "Drupal style path" when returning the URL. For the example,
    the returned image URL would be https://example.com/sites/default/files/2022-09/IET.PNG.

    For any Unix timestamps that are returned, make sure they are in seconds and not milliseconds.

    For "created" and "changed" dates, use the current date and time in unix timestamp format.
    ';

    Here is the html:
    $content
PROMPT;

    return $prompt;
  }

  /**
   * Creates a schema for the content type we're migrating to.
   *
   * @param string $entity_type
   *   The entity type to create the schema for.
   * @param string $bundle
   *   The name of the bundle to create the schema for.
   *
   * @return string
   *   The created schema in JSON format.
   */
  public function createSchema(string $entity_type, string $bundle): string {
    // Generate a Schema object.
    $schema = $this->schemaFactory->create($entity_type, $bundle);

    // Render the schema as a string conforming to the selecting schema type.
    $json_schema = $this->serializer->serialize($schema, self::SCHEMA_FORMAT);

    return $json_schema;
  }

  /**
   * Cleans the response to match the expected format.
   *
   * @param string $response
   *   The response to clean.
   * @param string $bundle
   *   The bundle we're migrating into.
   *
   * @return array|bool
   *   The cleaned response as an array, or FALSE if decoding fails.
   */
  public function normalizeResponse(string $response, string $bundle): array|bool {
    // Clean up the response to match the expected format.
    $cleaned = preg_replace('/```json|`/', '', $response);
    $decoded = json_decode($cleaned, TRUE);

    if ($decoded === NULL) {
      $this->logger->error('Failed to decode cleaned AI JSON response: @response', ['@response' => $response]);
      return FALSE;
    }

    // Massage the response to ensure it matches the expected format.
    $entity_values = [];
    if (is_array($decoded)
      && array_key_exists('data', $decoded)
      && is_array($decoded['data'])
      && array_key_exists('attributes', $decoded['data'])
      && is_array($decoded['data']['attributes'])) {
      $entity_values = $decoded['data']['attributes'];
    }
    else {
      $this->logger->error('Missing or invalid attributes in AI response for @url', ['@url' => $this->url]);
      return FALSE;
    }
    // This generally is not set where we want it to be.
    $entity_values['type'] = $bundle;
    return $entity_values;
  }

}
