<?php

namespace Drupal\editoria11y_si;

use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\Core\Database\Connection;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Logger\LoggerChannelFactoryInterface;
use Drupal\Core\Messenger\MessengerInterface;
use Drupal\Core\Queue\QueueFactory;
use Drupal\Core\State\StateInterface;
use Drupal\key\KeyRepositoryInterface;
use GuzzleHttp\Exception\RequestException;

/**
 * SiteImprove data importer.
 *
 * Handles importing SiteImprove data from SiteImprove API.
 */
class Editoria11ySiImporter {

  /**
   * Email address of SiteImprove user this is tied to.
   *
   * @var string
   */
  private string $apiUser;

  /**
   * API key of the user.
   *
   * @var string
   */
  private string $apiKey;

  /**
   * SiteImprove site that you want to reference.
   *
   * @var string
   */
  private string $site;

  /**
   * The website group for the APIs.
   *
   * @var string
   */
  private string $group;

  /**
   * The website's domain that will be stripped from results.
   *
   * @var string
   */
  private string $domain;

  /**
   * Constructs a new connection object.
   *
   * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $loggerFactory
   *   The logger factory instance.
   * @param \Drupal\Core\Config\ConfigFactoryInterface $configFactory
   *   The factory for configuration objects.
   * @param \Drupal\Core\Database\Connection $connection
   *   The connection for database connection objects.
   * @param \Drupal\Core\Messenger\MessengerInterface $messenger
   *   The messenger for message objects.
   * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entityTypeManager
   *   The entity type manager interface.
   * @param \Drupal\Core\State\StateInterface $state
   *   The State Interface.
   * @param \Drupal\key\KeyRepositoryInterface $keyRepository
   *   The Key repository interface.
   * @param \Drupal\Core\Queue\QueueFactory $queueFactory
   *   Drupal queue factory instance.
   */
  public function __construct(
    // @todo use loggerFactory or get rid of it.
    private LoggerChannelFactoryInterface $loggerFactory,
    private ConfigFactoryInterface $configFactory,
    private Connection $connection,
    private MessengerInterface $messenger,
    private EntityTypeManagerInterface $entityTypeManager,
    private StateInterface $state,
    private KeyRepositoryInterface $keyRepository,
    private readonly QueueFactory $queueFactory,
  ) {}

  /**
   * Helper function to get the secrets and other init items.
   */
  private function loadConnectionInformation() {
    // Load the settings from /admin/config/content/editoria11y/si.
    $config = $this->configFactory->get('editoria11y_si.settings');
    $this->domain = $config->get('domain');
    $key_id = $config->get('siteimprove_api');
    $keys = $this->keyRepository->getKey($key_id);

    $this->apiUser = $keys->getKeyValues()['username'] ?? '';
    $this->apiKey = $keys->getKeyValues()['api_key'] ?? '';
    $this->site = $keys->getKeyValues()['site'] ?? '';
    $this->group = $keys->getKeyValues()['group'] ?? '';
  }

  /**
   * Helper function to get the pages from the API.
   *
   * @param string $url
   *   The url of the api to get data from.
   */
  private function getPages(string $url) {
    // Get all of the pages.
    try {

      $process = curl_init($url);
      curl_setopt($process, CURLOPT_HTTPHEADER, ['Accept: application/json']);
      curl_setopt($process, CURLOPT_USERPWD, $this->apiUser . ":" . $this->apiKey);
      curl_setopt($process, CURLOPT_RETURNTRANSFER, TRUE);

      $response = curl_exec($process);
      // $status = curl_getinfo($process);
      curl_close($process);
      $data = json_decode($response, TRUE);
      return $data;
    }
    catch (RequestException $e) {
      // An error happened.
      // @todo refactor try/catch to be outside of this function.
    }
  }

  /**
   * Generate editoria11y_si import queue items for broken links.
   */
  public function generateImportQueueItemsForBrokenLinks() {
    $this->loadConnectionInformation();

    // Check to see if this check is enabled.
    if (!$this->configFactory->get('editoria11y_si.settings')->get('editoria11y_si_broken_links_import_enabled')) {
      // It is not enabled so we do not need to do anything.
      $this->messenger->addError('SiteImprove broken link checking is disabled. No items will be imported.');
      return;
    }
    $pageNumber = 1;
    $url = "https://api.siteimprove.com/v2/sites/" . $this->site . "/quality_assurance/links/pages_with_broken_links?page=" . $pageNumber . "&page_size=100";
    if (!empty($this->group)) {
      $url .= "&group_id=" . $this->group;
    }

    // Loop through all of the pages of siteImprove pages.
    $pagesWithBrokenLinks = [];
    $pageUrls = [];
    do {
      $data = $this->getPages($url);
      // Each $data will have 100 items. Need to extract the info in this.
      foreach ($data['items'] as $page) {

        // Loop through all urls and get their information.
        $pageUrl = "https://api.siteimprove.com/v2/sites/" . $this->site . "/quality_assurance/links/pages_with_broken_links/" . $page['id'] . "/broken_links";
        $process = curl_init($pageUrl);
        curl_setopt($process, CURLOPT_HTTPHEADER, ['Accept: application/json']);
        curl_setopt($process, CURLOPT_USERPWD, $this->apiUser . ":" . $this->apiKey);
        curl_setopt($process, CURLOPT_RETURNTRANSFER, TRUE);

        $response = curl_exec($process);
        curl_close($process);
        $pageData = json_decode($response, TRUE);
        // Removing the domain from the url since internal urls often
        // do not have this.
        $page['url'] = str_replace($this->domain, '', $page['url']);
        $pagesWithBrokenLinks[] = [
          'url' => $page['url'] ?? '',
          'siteimprove_url' => $page['_siteimprove']['page_report']['href'] ?? '',
          'raw_data' => $pageData['items'] ?? [],
          'siteimprove_type' => 'broken_links',
        ];
        $pageUrls[] = $page['url'] ?? '';
      }
      // Check to see if there is a next page of links. If not, we are done.
      $url = $data['links']['next']['href'] ?? FALSE;
    } while ($url);

    // Delete all broken_links That are no longer in the API.
    $this->deleteOldEntities($pageUrls, 'broken_links');

    // Get the editoria11y_si import queue.
    $editoria11ySiImportQueue = $this->queueFactory->get('editoria11y_si_import_processor');
    // Delete any left over items from the queue.
    while ($item = $editoria11ySiImportQueue->claimItem()) {
      if (isset($item->data['siteimprove_type']) && $item->data['siteimprove_type'] === 'broken_links') {
        $editoria11ySiImportQueue->deleteItem($item);
      }
    }

    // Create a queue item for each siteimprove page.
    foreach ($pagesWithBrokenLinks as $pagesWithBrokenLinksItem) {
      $editoria11ySiImportQueue->createItem($pagesWithBrokenLinksItem);
    }

  }

  /**
   * Generate editoria11y_si import queue items for misspellings.
   */
  public function generateImportQueueItemsForMisspellings() {
    $this->loadConnectionInformation();

    // Check to see if this check is enabled.
    if (!$this->configFactory->get('editoria11y_si.settings')->get('editoria11y_si_misspellings_import_enabled')) {
      // It is not enabled so we do not need to do anything.
      $this->messenger->addError('SiteImprove misspellings checking is disabled. No items will be imported.');
      return;
    }
    $pageNumber = 1;
    $url = "https://api.siteimprove.com/v2/sites/" . $this->site . "/quality_assurance/spelling/misspellings?page=" . $pageNumber . "&page_size=100";
    if (!empty($this->group)) {
      $url .= "&group_id=" . $this->group;
    }

    // Loop through all of the pages of siteImprove pages.
    $pagesWithMisspellings = [];
    $pageUrls = [];
    do {
      $data = $this->getPages($url);
      // Each $data will have 100 items. Need to extract the info in this.
      foreach ($data['items'] as $page) {
        // Loop through all urls and get their information.
        $pageUrl = "https://api.siteimprove.com/v2/sites/" . $this->site . "/quality_assurance/spelling/misspellings/" . $page['id'] . "/pages";
        $process = curl_init($pageUrl);
        curl_setopt($process, CURLOPT_HTTPHEADER, ['Accept: application/json']);
        curl_setopt($process, CURLOPT_USERPWD, $this->apiUser . ":" . $this->apiKey);
        curl_setopt($process, CURLOPT_RETURNTRANSFER, TRUE);

        $response = curl_exec($process);
        // $status = curl_getinfo($process);
        curl_close($process);
        $pagesData = json_decode($response, TRUE);

        // Loop through the pages that contain misspellings.
        foreach ($pagesData['items'] as $pageData) {
          // Removing the domain from the url since internal urls often
          // do not have this.
          $pageData['url'] = str_replace($this->domain, '', $pageData['url']);
          $rawData = [
            'word' => $page['word'] ?? '',
            'suggestion' => $page['suggestion'] ?? '',
            'preferred' => $page['preferred'] ?? '',
          ];
          $pagesWithMisspellings[] = [
            'url' => $pageData['url'] ?? '',
            'siteimprove_url' => $pageData['_siteimprove']['page_report']['href'] ?? '',
            'raw_data' => $rawData,
            'siteimprove_type' => 'misspellings',
          ];
          $pageUrls[] = $pageData['url'] ?? '';
        }

      }
      // Check to see if there is a next page of links. If not, we are done.
      $url = $data['links']['next']['href'] ?? FALSE;
    } while ($url);

    // Delete all misspellings That are no longer in the API.
    $this->deleteOldEntities($pageUrls, 'misspellings');

    // Get the editoria11y_si import queue.
    $editoria11ySiImportQueue = $this->queueFactory->get('editoria11y_si_import_processor');
    // Delete any left over items from the queue.
    while ($item = $editoria11ySiImportQueue->claimItem()) {
      if (isset($item->data['siteimprove_type']) && $item->data['siteimprove_type'] === 'misspellings') {
        $editoria11ySiImportQueue->deleteItem($item);
      }
    }

    // Create a queue item for each siteimprove page.
    foreach ($pagesWithMisspellings as $pagesWithMisspellingsItem) {
      $editoria11ySiImportQueue->createItem($pagesWithMisspellingsItem);
    }

  }

  /**
   * Generate editoria11y_si import queue items for reading scores.
   */
  public function generateImportQueueItemsForReadingScores() {
    $this->loadConnectionInformation();

    // Check to see if this check is enabled.
    if (!$this->configFactory->get('editoria11y_si.settings')->get('editoria11y_si_reading_score_import_enabled')) {
      // It is not enabled so we do not need to do anything.
      $this->messenger->addError('SiteImprove reading scores checking is disabled. No items will be imported.');
      return;
    }
    $pageNumber = 1;
    $url = "https://api.siteimprove.com/v2/sites/" . $this->site . "/quality_assurance/readability/tests/flesch_kincaid_grade_level/pages?page=" . $pageNumber . "&page_size=100";
    if (!empty($this->group)) {
      $url .= "&group_id=" . $this->group;
    }

    // Loop through all of the pages of siteImprove pages.
    $pagesWithReadingScores = [];
    $pageUrls = [];

    do {
      $data = $this->getPages($url);
      // Each $data will have 100 items. Need to extract the info in this.
      foreach ($data['items'] as $page) {
        // Removing the domain from the url since internal urls often
        // do not have this.
        $page['url'] = str_replace($this->domain, '', $page['url']);
        $pagesWithReadingScores[] = [
          'url' => $page['url'] ?? '',
          'siteimprove_url' => $page['_siteimprove']['page_report']['href'] ?? '',
          'raw_data' => $page['flesch_kincaid_grade_level_score'] ?? '',
          'siteimprove_type' => 'reading_score',
        ];
        $pageUrls[] = $page['url'] ?? '';
      }
      // Check to see if there is a next page of links. If not, we are done.
      $url = $data['links']['next']['href'] ?? FALSE;
    } while ($url);

    // Delete all reading_score That are no longer in the API.
    $this->deleteOldEntities($pageUrls, 'reading_score');

    // Get the editoria11y_si import queue.
    $editoria11ySiImportQueue = $this->queueFactory->get('editoria11y_si_import_processor');
    // Delete any left over items from the queue.
    while ($item = $editoria11ySiImportQueue->claimItem()) {
      if (isset($item->data['siteimprove_type']) && $item->data['siteimprove_type'] === 'reading_score') {
        $editoria11ySiImportQueue->deleteItem($item);
      }
    }

    // Create a queue item for each siteimprove page.
    foreach ($pagesWithReadingScores as $pagesWithReadingScoresItem) {
      $editoria11ySiImportQueue->createItem($pagesWithReadingScoresItem);
    }

  }

  /**
   * Get ids of all editoria11y_si entities that are no longer in the API.
   *
   * @param array|null $pageUrlsInApi
   *   The array of page URLs currently in the API from SiteImprove.
   * @param string $type
   *   The type of SiteImprove data (broken_links, misspellings, reading_score).
   */
  private function deleteOldEntities(?array $pageUrlsInApi, string $type) {
    $urlsInDrupal = $this->connection->select('editoria11y_si', 'es')
      ->fields('es', ['url'])
      ->condition('siteimprove_type', $type)
      ->execute()->fetchAll();

    // Loop through all of the urls in Drupal and see if they are in the API.
    // If not, we need to delete them.
    foreach ($urlsInDrupal as $urlInDrupal) {

      // No match found, so we need to delete this entity.
      if (!in_array($urlInDrupal->url, $pageUrlsInApi)) {
        // Find the entity by the url and type.
        $editoria11ySiEntitiesToDelete = $this->entityTypeManager->getStorage('editoria11y_si')
          ->getQuery()
          ->accessCheck(FALSE)
          ->condition('url', $urlInDrupal->url)
          ->condition('siteimprove_type', $type)
          ->execute();

        if (!empty($editoria11ySiEntitiesToDelete)) {
          $editoria11ySiEntitiesToDelete = $this->entityTypeManager->getStorage('editoria11y_si')->loadMultiple($editoria11ySiEntitiesToDelete);
          $this->entityTypeManager->getStorage('editoria11y_si')->delete($editoria11ySiEntitiesToDelete);
        }
        // Delete cache to help with memory issues.
        $this->entityTypeManager->getStorage('editoria11y_si')->resetCache(array_keys($editoria11ySiEntitiesToDelete));
      }
    }

  }

}
