<?php

declare(strict_types=1);

namespace Drupal\search_api_solr_dense_vector\Plugin\search_api\processor;

use Drupal\ai\AiProviderPluginManager;
use Drupal\ai\OperationType\Embeddings\EmbeddingsInput;
use Drupal\Component\Utility\Html;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Logger\LoggerChannelInterface;
use Drupal\Core\Plugin\PluginFormInterface;
use Drupal\search_api\Plugin\search_api\data_type\value\TextToken;
use Drupal\search_api\Processor\ProcessorPluginBase;
use Drupal\search_api\Utility\DataTypeHelperInterface;
use Symfony\Component\DependencyInjection\ContainerInterface;

/**
 * Add dense vectors to the index using AI module providers.
 *
 * @SearchApiProcessor(
 *   id = "solr_densevector",
 *   label = @Translation("Dense Vector"),
 *   description = @Translation("Generates dense vector embeddings using AI module providers for semantic search of text fields."),
 *   stages = {
 *     "preprocess_index" = 100,
 *   },
 * )
 */
class DenseVector extends ProcessorPluginBase implements PluginFormInterface {

  /**
   * The entity type manager service.
   */
  protected EntityTypeManagerInterface $entityTypeManager;

  /**
   * The AI provider plugin manager.
   */
  protected AiProviderPluginManager $aiProviderManager;

  /**
   * The logger service.
   */
  protected LoggerChannelInterface $logger;

  /**
   * The data type helper service.
   */
  protected DataTypeHelperInterface $dataTypeHelper;

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    $instance = new static($configuration, $plugin_id, $plugin_definition);
    $instance->entityTypeManager = $container->get('entity_type.manager');
    $instance->aiProviderManager = $container->get('ai.provider');
    $instance->logger = $container->get('logger.factory')->get('search_api_solr_dense_vector');
    $instance->dataTypeHelper = $container->get('search_api.data_type_helper');
    return $instance;
  }

  /**
   * {@inheritdoc}
   */
  public function defaultConfiguration() {
    return [
      'ai_provider' => '',
      'ai_model_id' => '',
      'vector_dimension' => 1024,
      'similarity_function' => 'cosine',
      'content_field' => '',
    ] + parent::defaultConfiguration();
  }

  /**
   * {@inheritdoc}
   */
  public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
    $form['ai_provider'] = [
      '#type' => 'select',
      '#title' => $this->t('AI provider'),
      '#description' => $this->t('Select the AI provider to use for generating embeddings. Only providers that support embeddings are shown.'),
      '#options' => $this->getEmbeddingsProviders(),
      '#default_value' => $this->configuration['ai_provider'] ?? NULL,
      '#empty_option' => $this->t('- Select an AI provider -'),
      '#ajax' => [
        'callback' => [$this, 'updateModelOptions'],
        'wrapper' => 'ai-model-wrapper',
        'event' => 'change',
      ],
    ];

    $selected_provider = $form_state->getCompleteFormState()->getTriggeringElement()['#value'] ?? $this->configuration['ai_provider'];
    $model_options = $this->getModelOptions($selected_provider);

    $form['ai_model_id'] = [
      '#type' => 'select',
      '#title' => $this->t('AI model'),
      '#description' => $this->t('Select the AI model to use for generating embeddings.'),
      '#options' => $model_options,
      '#default_value' => $this->configuration['ai_model_id'] ?? '',
      '#empty_option' => $this->t('- Select an AI model -'),
      '#prefix' => '<div id="ai-model-wrapper">',
      '#suffix' => '</div>',
    ];

    $form['similarity_function'] = [
      '#type' => 'select',
      '#title' => $this->t('Similarity function'),
      '#description' => $this->t('Vector similarity function; used in search to return top K most similar vectors to a target vector.'),
      '#options' => [
        'cosine' => $this->t('Cosine'),
        'euclidean' => $this->t('Euclidean'),
        'dot_product' => $this->t('Dot product'),
      ],
      '#default_value' => $this->configuration['similarity_function'] ?? 'cosine',
      '#empty_option' => $this->t('- Select -'),
    ];

    $fields = $this->index->getFields();
    $field_options = [];

    foreach ($fields as $name => $field) {
      if (!$field->isHidden() && $this->dataTypeHelper->isTextType($field->getType(), ['text', 'string'])) {
        $field_options[$name] = Html::escape($field->getPrefixedLabel());
      }
    }

    $form['content_field'] = [
      '#type' => 'select',
      '#title' => $this->t('Set the "content" field for AI Search tool(s).'),
      '#description' => $this->t('If the ai_search module is enabled, select which field should be returned when using the RAG/Vector Search tool in an agent. You can otherwise access this in your own custom tools or search result handling code using getExtraData() on the result.'),
      '#options' => $field_options,
      '#default_value' => $this->configuration['content_field'],
    ];

    $form['note'] = [
      '#markup' => $this->t('By changing these values, like embedding model with a different size, you may have to upload new configuration to your Solr server and reindex all items.'),
    ];

    return $form;
  }

  /**
   * {@inheritdoc}
   */
  public function validateConfigurationForm(array &$form, FormStateInterface $form_state) {}

  /**
   * {@inheritdoc}
   */
  public function submitConfigurationForm(array &$form, FormStateInterface $form_state) {
    $values = $form_state->getValues();
    $this->setConfiguration($values);

    try {
      // @todo we need a better way of supporting this update
      $field = $this->entityTypeManager
        ->getStorage('solr_field_type')
        ->load('knn_vector_und_9_0_0');
      $provider = $this->aiProviderManager->createInstance($values['ai_provider']);
      $embedding_size = $provider->embeddingsVectorSize($values['ai_model_id']);
      $settings = $field->getFieldType();
      $settings['vectorDimension'] = $embedding_size;
      $settings['similarityFunction'] = $values['similarity_function'];
      $field->setFieldType($settings);
      $field->save();
    }
    catch (\Exception $e) {
      $this->logger->error('Failed to save field type settings from search processor configuration: @message', ['@message' => $e->getMessage()]);
    }
  }

  /**
   * {@inheritdoc}
   */
  public function preprocessIndexItems(array $items) {
    $config = $this->configuration;

    if (empty($config['ai_provider'])) {
      return;
    }

    try {
      $provider = $this->aiProviderManager->createInstance($config['ai_provider']);
    }
    catch (\Exception $e) {
      $this->logger->error('Failed to create AI provider instance: @message', ['@message' => $e->getMessage()]);
      return;
    }

    foreach ($items as $item) {
      foreach ($item->getFields() as $field) {
        if ('solr_densevector' === $field->getType()) {
          $vectors = [];

          foreach ($field->getValues() as $value) {
            // Convert the value to a string to allow creating the embeddings.
            if (is_array($value)) {
              $text = '';
              foreach ($value as $v) {
                $text .= $v instanceof TextToken ? $v->getText() : $v;
              }
            }
            else {
              $text = (string) $value;
            }

            // Create the embeddings from the text.
            if (!empty($text)) {
              try {
                $embedding_input = new EmbeddingsInput($text);
                $embedding_output = $provider->embeddings($embedding_input, $config['ai_model_id']);
                $vectors[] = $embedding_output->getNormalized();
              }
              catch (\Exception $e) {
                $this->logger->error('Failed to generate embeddings: @message', ['@message' => $e->getMessage()]);
                continue;
              }
            }
          }

          // Set the values of the field to the vectors.
          if (count($vectors)) {
            $field->setValues($vectors);
          }
        }
      }
    }
  }

  /**
   * AJAX callback to update model options when the selected provider changes.
   */
  public function updateModelOptions(array &$form, FormStateInterface $form_state) {
    $selected_provider = $form_state->getTriggeringElement()['#value'];
    $model_options = $this->getModelOptions($selected_provider);
    $form['settings']['solr_densevector']['ai_model_id']['#options'] = $model_options;

    if (!empty($form['settings']['solr_densevector']['ai_model_id']['#default_value']) &&
      !array_key_exists($form['settings']['solr_densevector']['ai_model_id']['#default_value'], $model_options)) {
      $form['settings']['solr_densevector']['ai_model_id']['#default_value'] = '';
    }

    return $form['settings']['solr_densevector']['ai_model_id'];
  }

  /**
   * Get model options for a selected provider.
   *
   * @param string $provider_id
   *   The provider ID.
   *
   * @return array
   *   Array of model options.
   */
  protected function getModelOptions(string $provider_id): array {
    if (empty($provider_id)) {
      return [];
    }

    try {
      $provider = $this->aiProviderManager->createInstance($provider_id);
      return $provider->getConfiguredModels('embeddings');
    }
    catch (\Exception $e) {
      $this->logger->error('Failed to load models for provider @provider: @message', [
        '@provider' => $provider_id,
        '@message' => $e->getMessage(),
      ]);
      return [];
    }
  }

  /**
   * Returns a list of valid embedding AI Providers.
   *
   * @return array
   *   The list of providers.
   */
  protected function getEmbeddingsProviders() {
    $providers = [];

    try {
      $providers = $this->aiProviderManager->getProvidersForOperationType('embeddings');

      foreach ($providers as $plugin_id => $definition) {
        $providers[$plugin_id] = $definition['label'];
      }
    }
    catch (\Exception $e) {
      $this->logger->error('Failed to load AI providers: @message', [
        '@message' => $e->getMessage(),
      ]);
    }

    return $providers;
  }

}
