<?php

namespace Drupal\ai_related_content\Plugin\views\filter;

use Drupal\ai\AiVdbProviderPluginManager;
use Drupal\ai_related_content\AiRelatedContentManager;
use Drupal\ai_related_content\Plugin\views\argument\AIRelatedContentNodeArgument;
use Drupal\Core\Entity\EntityDisplayRepositoryInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\Core\Routing\RouteMatchInterface;
use Drupal\node\NodeInterface;
use Drupal\search_api\IndexInterface;
use Drupal\search_api\Plugin\views\filter\SearchApiFulltext;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Symfony\Component\HttpFoundation\RequestStack;

/**
 * Filters content based on relevance to the node in the current request.
 *
 * @ingroup views_filter_handlers
 *
 * @ViewsFilter("ai_related_content_node_filter")
 */
class AIRelatedContentNodeFilter extends SearchApiFulltext implements ContainerFactoryPluginInterface {

  /**
   * Constructs a the AI Related Content Views Filter.
   *
   * @param array $configuration
   *   A configuration array containing information about the plugin instance.
   * @param string $plugin_id
   *   The plugin_id for the plugin instance.
   * @param mixed $plugin_definition
   *   The plugin implementation definition.
   * @param \Symfony\Component\HttpFoundation\RequestStack $requestStack
   *   The request stack.
   * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entityTypeManager
   *   The entity type manager.
   * @param \Drupal\Core\Entity\EntityDisplayRepositoryInterface $entityDisplayRepository
   *   The entity display repository.
   * @param \Drupal\Core\Routing\RouteMatchInterface $routeMatch
   *   The current route.
   * @param \Drupal\ai\AiVdbProviderPluginManager $vdbProviderManager
   *   The Vector Database Provider.
   * @param \Drupal\ai_related_content\AiRelatedContentManager $aiRelatedContentManager
   *   The AI Related Content helper.
   */
  public function __construct(
    array $configuration,
    $plugin_id,
    $plugin_definition,
    protected RequestStack $requestStack,
    protected EntityTypeManagerInterface $entityTypeManager,
    protected EntityDisplayRepositoryInterface $entityDisplayRepository,
    protected RouteMatchInterface $routeMatch,
    protected AiVdbProviderPluginManager $vdbProviderManager,
    protected AiRelatedContentManager $aiRelatedContentManager,
  ) {
    parent::__construct($configuration, $plugin_id, $plugin_definition);
  }

  /**
   * {@inheritdoc}
   *
   * This filter requires the 'node' entity type.
   */
  public function getEntityType() {
    return 'node';
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    return new static(
      $configuration,
      $plugin_id,
      $plugin_definition,
      $container->get('request_stack'),
      $container->get('entity_type.manager'),
      $container->get('entity_display.repository'),
      $container->get('current_route_match'),
      $container->get('ai.vdb_provider'),
      $container->get('ai_related_content.manager'),
    );
  }

  /**
   * {@inheritdoc}
   */
  public function defineOptions() {
    $options = parent::defineOptions();
    $options['view_mode'] = ['default' => 'teaser'];
    $options['vector_source_search_api_index'] = ['default' => '_generate_embedding'];
    $options['generate_embedding_on_demand_fallback'] = ['default' => FALSE];
    $options['score_threshold'] = ['default' => NULL];
    return $options;
  }

  /**
   * {@inheritdoc}
   */
  public function buildOptionsForm(&$form, FormStateInterface $form_state) {
    parent::buildOptionsForm($form, $form_state);

    // No user-facing search, so not applicable.
    $hide_options = [
      'operator',
      'expose_button',
      'min_length',
      'value',
      'value_max_words',
    ];
    foreach ($form as $key => $value) {
      if (in_array($key, $hide_options) || str_starts_with($key, 'parse_mode')) {
        $form[$key]['#access'] = FALSE;
      }
    }

    // Allow the site builder to select the View Mode. This is very important
    // to get good results as well as avoid potentially high LLM costs.
    $view_modes = $this->entityDisplayRepository->getViewModeOptions('node');
    $form['view_mode'] = [
      '#type' => 'select',
      '#title' => $this->t('Node View Mode for Source Content'),
      '#options' => $view_modes,
      '#description' => $this->t('The view mode used to render the current node. It is strongly recommended to curate this to only render actual text content to avoid the vector database search finding related content based on text not key to the Node itself (e.g. such as content from this View potentially).'),
    ];
    if (array_key_exists($this->options['view_mode'], $view_modes)) {
      $form['view_mode']['#default_value'] = $this->options['view_mode'];
    }

    $options = $this->aiRelatedContentManager->findCompatibleIndexes();
    $options['_generate_embedding'] = $this->t('(Always generate new vectors, do not retrieve already generated embedding)');
    $form['vector_source_search_api_index'] = [
      '#type' => 'select',
      '#title' => $this->t('Source vectors to find related content for'),
      '#options' => $options,
      '#description' => $this->t('Rather than generating new vectors, although cacheable, vectors can be retrieved from already generated ones by querying the vector database. This does not necessarily need to be the same index so long as the dimensions are the same and the Drupal Node ID exists. If not found, new vectors will be generated only if you opt-in to on-demand generation below. It is useful to use a separate index if your index typically has many chunks per item. Then you likely want a separate index that has a giant chunk size so the entire Node is part of a single vector that represents the content as a whole to find related content for. When multiple chunks are found, an average pooling strategy is applied so the average of all chunks from the entity are used to find related content. You must verify the dimensions are the same or expect poor related content relevancy.'),
    ];
    if (isset($this->options['vector_source_search_api_index']) && array_key_exists($this->options['vector_source_search_api_index'], $options)) {
      $form['vector_source_search_api_index']['#default_value'] = $this->options['vector_source_search_api_index'];
    }

    // Add the new opt-in checkbox.
    $form['generate_embedding_on_demand_fallback'] = [
      '#type' => 'checkbox',
      '#title' => $this->t('Generate embedding on-demand as a fallback'),
      '#description' => $this->t('If a "Source vectors" index is chosen but no vector is found, check this box to generate a new embedding on-demand. If unchecked (default), the view will show no results. When checking this box it is very important to ensure adequate caching is enabled otherwise you may incur higher LLM costs if the caching layer from AI Search is not used (see https://www.drupal.org/project/ai/issues/3552522).'),
      '#default_value' => $this->options['generate_embedding_on_demand_fallback'],
      '#states' => [
        'invisible' => [
          ':input[name="options[vector_source_search_api_index]"]' => ['value' => '_generate_embedding'],
        ],
      ],
    ];

    $form['score_threshold'] = [
      '#type' => 'number',
      '#title' => $this->t('Override minimum relevance score on the "AI Search Score Threshold" processor'),
      '#step' => 0.01,
      '#min' => 0,
      '#max' => 1,
      '#default_value' => $this->options['score_threshold'] >= 0 ? $this->options['score_threshold'] : NULL,
      '#description' => $this->t('Set a minimum relevance score (0-1). This will override the "AI Search Score Threshold" processor setting on the index if configured. For example, if the index processor is set to 0.2 and you set this to 0.4, only results with a score of 0.4 or higher will be shown. If you set this to 0.1, the processor\'s 0.2 setting will still apply. Leave blank to use the score threshold defined in the Search API Index (if any). This has no effect if the processor is not set up on the index.'),
    ];
  }

  /**
   * {@inheritdoc}
   */
  public function validateOptionsForm(&$form, FormStateInterface $form_state): void {
    $score_threshold = $form_state->getValue('score_threshold');
    if ($score_threshold !== NULL && !$this->getIndex()->isValidProcessor('ai_search_score_threshold')) {
      $form_state->setErrorByName('score_threshold', $this->t('The "AI Search Score Threshold" processor is not enabled or is invalid on the Search API Index. Please leave the threshold override blank or enable the processor.'));
    }
  }

  /**
   * {@inheritdoc}
   */
  public function query() {
    if ($this->routeMatch->getRouteName() === 'entity.view.preview_form') {
      $this->addDebugHelp();
    }

    // Bail if unable to load node.
    $query = $this->getQuery();
    $current_node = $this->getNodeForRelatedContent();
    if (!$current_node) {
      $query->getSearchApiQuery()->abort();
      return;
    }

    // Exclude the current content item.
    $query->addCondition('nid', (int) $current_node->id(), '!=');

    // Override the minimum score on the "AI Search Score Threshold" processor.
    if ($this->options['score_threshold'] !== NULL) {
      $query->setOption('ai_search_score_threshold_override', $this->options['score_threshold']);
    }

    // Attempt to get the existing vector for the node.
    $vector = $this->getVectorFromNode($current_node);
    if ($vector) {

      // If a vector is found, use it directly.
      $query->setOption('vector_input', $vector);
    }
    else {
      // No vector was found.
      // If we're not explicitly "always generating", check the fallback option.
      if ($this->options['vector_source_search_api_index'] !== '_generate_embedding') {
        // We selected a source index, but no vector was found.
        // Only proceed if the fallback option is explicitly checked (opt-in).
        if (empty($this->options['generate_embedding_on_demand_fallback'])) {
          $query->getSearchApiQuery()->abort();
          return;
        }
      }

      // Proceed with on-demand generation (either '_generate_embedding' was
      // selected, or a source index was selected and the fallback was checked).
      $content = $this->getTextFromNode($current_node);
      if (empty($content)) {
        $query->getSearchApiQuery()->abort();
        return;
      }

      // Set the current content to be the query keys.
      $this->value = $content;
    }
    parent::query();
  }

  /**
   * Fetches the indexed vector for a given node.
   *
   * @param \Drupal\node\NodeInterface $node
   *   The node to get the vector for.
   *
   * @return array|null
   *   The vector array if found, otherwise null.
   */
  protected function getVectorFromNode(NodeInterface $node): ?array {
    $vector_source_search_api_index = $this->options['vector_source_search_api_index'] ?? '_generate_embedding';
    if ($vector_source_search_api_index === '_generate_embedding') {
      return NULL;
    }

    $index = $this->getQuery()->getIndex();
    if ($index->id() !== $vector_source_search_api_index) {
      $source_index = $this->entityTypeManager
        ->getStorage('search_api_index')
        ->load($vector_source_search_api_index);
      if (!$source_index instanceof IndexInterface) {
        throw new \Exception('Unable to find the source index, review your selection in the "AI Related Content from Node" Views Filter Handler.');
      }
      $index = $source_index;
    }
    $server = $index->getServerInstance();
    if (!$server) {
      $message = (string) $this->t('Unable to load the Search API Index for @search_api_index in the "AI Related Content from Node" Views Filter Handler.', [
        '@search_api_index' => $vector_source_search_api_index,
      ]);
      throw new \Exception($message);
    }

    // Check that this is AI 2.0.x at least.
    // @see https://www.drupal.org/project/ai/issues/3489566.
    if (!method_exists($server->getBackend(), 'getSearchVectorInput')) {
      throw new \Exception('Getting the vector from an existing Search API Index only works from AI module 2.0.x or higher in the "AI Related Content from Node" Views Filter Handler.');
    }

    // Get the backend and VDB configuration from the server.
    $backend_config = $server->getBackendConfig();
    $vdb_plugin_id = $backend_config['database'] ?? NULL;
    if (!$vdb_plugin_id || !$this->vdbProviderManager->hasDefinition($vdb_plugin_id)) {
      $message = (string) $this->t('No Vector Database back-end configuration could be found for Plugin @vdb_plugin_id in @search_api_index Search API Index in the "AI Related Content from Node" Views Filter Handler.', [
        '@search_api_index' => $vector_source_search_api_index,
        '@vdb_plugin_id' => $vdb_plugin_id,
      ]);
      throw new \Exception($message);
    }

    // If raw embedding is not included, we cannot get vector input from this.
    if (empty($backend_config['include_raw_embedding_vector']) || !$backend_config['include_raw_embedding_vector']) {
      $message = (string) $this->t('The Search API Index for @search_api_index does not have "include_raw_embedding_vector" set to true in the "AI Related Content from Node" Views Filter Handler.', [
        '@search_api_index' => $vector_source_search_api_index,
      ]);
      throw new \Exception($message);
    }

    // Get the raw vector from the Search API Index.
    $entity_id = 'entity:node/' . $node->id() . ':' . $node->language()->getId();
    /** @var \Drupal\search_api\Query\QueryInterface $query */
    $query = $index->query();
    $query->addCondition('drupal_entity_id', $entity_id);

    // Pinecone specifically requires embeddings to be provided in order to
    // query their database. This is likely a Pinecone specific problem. We use
    // as little tokens as possible here, e.g. 1 token at ($0.02 / 1,000,000)
    // cost with OpenAI text embedding small (as of 07/10/2025).
    // @todo Consider allowing other VDB Providers to opt-in to this.
    // @see https://community.pinecone.io/t/query-with-metadata-only/2089/5
    if ($vdb_plugin_id === 'pinecone') {
      $query->keys('Title');
    }
    $results = $query->execute();

    // Retrieve all embeddings for the same entity. If the user has not used an
    // index with a high chunk size or average pool embedding strategy, each
    // entity may have multiple chunks. We want related content based on the
    // entire entity.
    $embeddings = [];
    if ($results->getResultCount()) {
      foreach ($results->getResultItems() as $result_item) {
        $vector = $result_item->getExtraData('raw_vector');
        if (!empty($vector)) {
          $embeddings[] = $vector;
        }
      }
    }

    // If no embeddings were found, return null.
    if (empty($embeddings)) {
      return NULL;
    }

    // If only one chunk was found, just return its vector directly.
    if (count($embeddings) === 1) {
      return $embeddings[0];
    }

    // If multiple chunks were found for the same source node, average them.
    return $this->averagePooling($embeddings);
  }

  /**
   * Return merged embedding via Average Pooling.
   *
   * This is taken directly from `AveragePoolEmbeddingStrategy.php` in the AI
   * Search module.
   *
   * @param array $embeddings
   *   The embeddings.
   *
   * @return array
   *   The updated average embeddings.
   */
  protected function averagePooling(array $embeddings): array {
    $numEmbeddings = count($embeddings);
    $embeddingSize = count($embeddings[0]);

    $averageEmbedding = array_fill(0, $embeddingSize, 0.0);

    foreach ($embeddings as $embedding) {
      for ($i = 0; $i < $embeddingSize; $i++) {
        $averageEmbedding[$i] += $embedding[$i];
      }
    }

    for ($i = 0; $i < $embeddingSize; $i++) {
      $averageEmbedding[$i] /= $numEmbeddings;
    }

    return $averageEmbedding;
  }

  /**
   * Add warning messages when the site builder has misconfigured the View.
   */
  protected function addDebugHelp(): void {

    // Ensure that the node and its body are found.
    $current_node = $this->getNodeForRelatedContent();
    if ($current_node) {
      $content = $this->getTextFromNode($current_node);
      if (empty($content)) {
        $this->messenger()->addWarning($this->t('The Node to use to find related contents was found; however, when attempting to render it in the selected View Mode (@view_mode) no content was found and therefore no related content could be found.', [
          '@view_mode' => $this->options['view_mode'],
        ]));
      }
    }
    else {
      $this->messenger()->addWarning($this->t('When previewing Views is unaware of which Node ID you want related content for. Provide a Node ID like "123" in the "Preview with contextual filters" input.'));
    }

    if (!$this->getRelatedContentArgument()) {
      $this->messenger()->addWarning($this->t('The contextual filter for AI Related Content appears to be missing. Please ensure the "Fulltext search from current node" contextual filter is added. Choose to "Hide the view" when there are no results.'));
    }

    if (!array_key_exists('nid', $this->getIndex()->getFields())) {
      $this->messenger()->addWarning($this->t('Your Search Index must have the Node ID indexed to exclude the current node from its own related content. The machine name for the field expected is "nid". In your search index fields page, add the Node ID and set it as a "Filterable Attribute".'));
    }
  }

  /**
   * Get the node to retrieve related content for.
   *
   * @return \Drupal\node\NodeInterface|null
   *   The node or null.
   */
  protected function getNodeForRelatedContent(): NodeInterface|null {

    // If the argument is still attached to the View as needed.
    if ($argument = $this->getRelatedContentArgument()) {
      $node = $argument->getNode();
      if ($node instanceof NodeInterface) {
        return $node;
      }
    }
    return NULL;
  }

  /**
   * Get the AI Related Content argument belonging to this module.
   *
   * @return \Drupal\ai_related_content\Plugin\views\argument\AIRelatedContentNodeArgument|null
   *   The related content argument if found.
   */
  protected function getRelatedContentArgument(): AIRelatedContentNodeArgument|null {
    if (is_array($this->view->argument)) {
      foreach ($this->view->argument as $argument) {
        if ($argument instanceof AIRelatedContentNodeArgument) {
          return $argument;
        }
      }
    }
    return NULL;
  }

  /**
   * Get a text representation of the source to find related content for.
   *
   * @param \Drupal\node\NodeInterface $node
   *   The node to get related content for.
   *
   * @return string
   *   The content.
   */
  protected function getTextFromNode(NodeInterface $node): string {

    // Render the node in the selected View Mode if it still exists.
    $view_mode = $this->options['view_mode'];
    $view_modes = $this->entityDisplayRepository->getViewModeOptions('node');
    if (!empty($view_mode) && array_key_exists($this->options['view_mode'], $view_modes)) {

      // Build the rendered content.
      $build = $this->entityTypeManager->getViewBuilder('node')->view($node, $view_mode);
      if (empty($build)) {
        return '';
      }

      // Conditionally render based on the current route.
      if ($this->routeMatch->getRouteName() === 'entity.view.preview_form') {
        // In the Views UI preview, there's no parent render context, so we
        // must create one with renderRoot().
        $rendered_content = $this->getRenderer()->renderRoot($build);
      }
      else {
        // On a regular page, we are inside a render context, so we must
        // use render() to avoid breaking asset bubbling.
        $rendered_content = $this->getRenderer()->render($build);
      }

      // Convert to markdown if available.
      if (class_exists('League\CommonMark\CommonMarkConverter')) {
        // Ignore the non-use statement loading since this dependency may
        // not exist.
        // @codingStandardsIgnoreLine
        $converter = new \League\CommonMark\CommonMarkConverter([
          'html_input' => 'strip',
          'allow_unsafe_links' => FALSE,
        ]);
        $text_content = $converter->convert($rendered_content);
        return trim($text_content);
      }
      else {
        // Fallback to plain text.
        $text_content = strip_tags($rendered_content);

        // Strip extra new lines.
        $text_content = trim(preg_replace("/\n\n+/s", "\n", $text_content));
        return trim($text_content);
      }
    }
    return '';
  }

  /**
   * {@inheritdoc}
   */
  public function getCacheMaxAge() {
    $generates_on_demand = FALSE;
    if ($this->options['vector_source_search_api_index'] === '_generate_embedding') {
      $generates_on_demand = TRUE;
    }
    elseif (!empty($this->options['generate_embedding_on_demand_fallback'])) {
      // Fallback is enabled, so we might generate on demand if a
      // vector is not found.
      $generates_on_demand = TRUE;
    }

    if (!$generates_on_demand) {
      // We are *only* using a source index and will NOT fall back.
      // We can cache this normally.
      return parent::getCacheMaxAge();
    }

    // If we are here, we *might* generate on demand. Check if source content
    // exists. If there is no content, bypass cache as we are bailing
    // immediately, and we do not want 'no related content' to be cached.
    $current_node = $this->getNodeForRelatedContent();
    if (!$current_node || empty($this->getTextFromNode($current_node))) {
      return 0;
    }
    return parent::getCacheMaxAge();
  }

  /**
   * {@inheritdoc}
   */
  public function adminSummary() {
    if (!empty($this->options['exposed'])) {
      return $this->t('exposed');
    }
    $view_modes = $this->entityDisplayRepository->getViewModeOptions('node');
    $selected_view_mode = $this->options['view_mode'];
    $view_mode_label = $view_modes[$selected_view_mode] ?? $selected_view_mode;

    $source_index = $this->options['vector_source_search_api_index'] ?? ' (not set)';
    $fallback = '';
    if ($source_index !== '_generate_embedding' && !empty($this->options['generate_embedding_on_demand_fallback'])) {
      $fallback = ' ' . $this->t('(with fallback)');
    }

    $score = '';
    if (!empty($this->options['score_threshold']) && $this->options['score_threshold'] >= 0) {
      $score = $this->t(', Overridden min score: @score', [
        '@score' => $this->options['score_threshold'],
      ]);
    }

    return $this->t('View mode: @view_mode, Source index: @vector_source_search_api_index@fallback@score', [
      '@view_mode' => $view_mode_label,
      '@vector_source_search_api_index' => $source_index,
      '@fallback' => $fallback,
      '@score' => $score,
    ]);
  }

}
