<?php

namespace Drupal\dkan_dataset_archiver\Service;

use Drupal\Component\Uuid\Uuid;
use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\Core\Datetime\DrupalDateTime;
use Drupal\Core\DependencyInjection\ContainerInjectionInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Extension\ModuleHandler;
use Drupal\Core\File\FileExists;
use Drupal\Core\File\FileSystemInterface;
use Drupal\Core\Queue\QueueFactory;
use Drupal\datetime\Plugin\Field\FieldType\DateTimeItemInterface;
use Drupal\dkan_dataset_archiver\Entity\DdaArchiveInterface;
use Drupal\file\Entity\File;
use Drupal\file\FileInterface;
use Drupal\file\FileRepositoryInterface;
use Drupal\metastore_search\Search;
use Drupal\metastore\Storage\DataFactory;
use Drupal\metastore\Storage\NodeData;
use Drupal\node\NodeInterface;
use Psr\Log\LoggerInterface;
use Symfony\Component\DependencyInjection\ContainerInterface;

/**
 * Archive Service.
 */
class ArchiveService implements ContainerInjectionInterface {

  /**
   * The file repository service.
   *
   * @var \Drupal\file\FileRepositoryInterface
   */
  protected $fileRepository;

  /**
   * The file system service.
   *
   * @var \Drupal\Core\File\FileSystemInterface
   */
  protected $fileSystem;

  /**
   * The archiver settings.
   *
   * @var \Drupal\Core\Config\ImmutableConfig
   */
  protected $archiverSettings;

  /**
   * The entity type manager.
   *
   * @var \Drupal\Core\Entity\EntityTypeManagerInterface
   */
  private $entityTypeManager;

  /**
   * The dkan_dataset_archiver logger channel.
   *
   * @var \Psr\Log\LoggerInterface
   */
  protected $logger;

  /**
   * The metastore search api service.
   *
   * @var \Drupal\metastore_search\Search
   */
  private $metastoreSearch;

  /**
   * Metastore Storage factory.
   *
   * @var \Drupal\metastore\Storage\DataFactory
   */
  protected $metastoreStorage;

  /**
   * The module handler.
   *
   * @var \Drupal\Core\Extension\ModuleHandler
   */
  protected $moduleHandler;


  /**
   * The queue factory.
   *
   * @var \Drupal\Core\Queue\QueueFactory
   */
  protected $queue;

  /**
   * Storages.
   *
   * @var array
   */
  protected $storages = [];

  /**
   * Term map to convert or coalesce terms.
   *
   * @var array
   */
  protected $termMap = [];

  /**
   * Utility of helper functions.
   *
   * @var \Drupal\dkan_dataset_archiver\Service\Util
   */
  protected $util;

  /**
   * Current year.
   *
   * @var string
   */
  private $year;

  /**
   * All published datasets.
   *
   * @var array
   */
  protected $publishedDatasets = [];

  /**
   * {@inheritDoc}
   *
   * @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory
   *   The config factory.
   * @param \Drupal\dkan_dataset_archiver\Service\Util $util
   *   Utility.
   * @param \Drupal\metastore_search\Search $metastoreSearchService
   *   Metastore Search wrapper for the SearchApi.
   * @param \Drupal\metastore\Storage\DataFactory $metastoreStorage
   *   Metastore Storage factory.
   * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entityTypeManager
   *   The entity type manager.
   * @param \Drupal\file\FileRepositoryInterface $file_repository
   *   The file repository service.
   * @param \Drupal\Core\File\FileSystemInterface $file_system
   *   The file system service.
   * @param \Psr\Log\LoggerInterface $logger
   *   The dkan_dataset_archiver logger channel.
   * @param \Drupal\Core\Extension\ModuleHandler $moduleHandler
   *   The module handler.
   * @param \Drupal\Core\Queue\QueueFactory $queue
   *   The queue factory.
   */
  public function __construct(
    ConfigFactoryInterface $config_factory,
    Util $util,
    Search $metastoreSearchService,
    DataFactory $metastoreStorage,
    EntityTypeManagerInterface $entityTypeManager,
    FileRepositoryInterface $file_repository,
    FileSystemInterface $file_system,
    LoggerInterface $logger,
    ModuleHandler $moduleHandler,
    QueueFactory $queue,
  ) {
    $this->queue = $queue;
    $this->moduleHandler = $moduleHandler;
    $this->util = $util;
    $this->year = Util::date()->format('Y');
    $this->logger = $logger;
    $this->metastoreSearch = $metastoreSearchService;
    $this->entityTypeManager = $entityTypeManager;
    $this->fileRepository = $file_repository;
    $this->fileSystem = $file_system;
    $this->archiverSettings = $config_factory->get('dkan_dataset_archiver.settings');
    $this->metastoreStorage = $metastoreStorage;
  }

  /**
   * {@inheritDoc}
   */
  public static function create(ContainerInterface $container): self {
    return new static(
      $container->get('config.factory'),
      $container->get('dkan_dataset_archiver.util'),
      $container->get('dkan.metastore_search.service'),
      $container->get('dkan.metastore.storage'),
      $container->get('entity_type.manager'),
      $container->get('file.repository'),
      $container->get('file_system'),
      $container->get('logger.channel.dkan_dataset_archiver'),
      $container->get('module_handler'),
      $container->get('queue'),
    );
  }

  /**
   * Queue aggregate archiving of a theme or keywords's individual archives.
   *
   * @param string $type
   *   Either 'keyword' or 'theme'.
   * @param string $term
   *   The term to aggregate on.
   * @param array $archive_ids
   *   The archive ids to include in the aggregation.
   * @param bool $private
   *   Whether the aggregation is private.
   * @param string|null $aggregate_of
   *   The collation type to use for the aggregation. Usually matches type.
   */
  public function addToAggregationQueue($type, $term, $archive_ids, $private = FALSE, ?string $aggregate_of = NULL): void {
    /** @var \Drupal\Core\Queue\QueueInterface $archiveQueue */
    $archiveQueue = $this->queue->get('archive_aggregation');
    $aggregation_data = [
      'type' => $type,
      'aggregate_of' => $aggregate_of,
      'term' => $term,
      'archive_ids' => $archive_ids,
      'private' => $private,
    ];
    $queue_id = $archiveQueue->createItem($aggregation_data);
    $private_text = $private ? t('private') . ' ' : '';

    $this->logger->info(
      "@type @term had queued archives '@ids' for aggregation queue_id: %queueId", [
        '@type' => "{$private_text}{$type}",
        '@term' => $term,
        '@ids' => implode(',', $archive_ids),
        '%queueId' => $queue_id,
      ]
    );
  }

  /**
   * Create an aggregate archive for a theme or keyword.
   *
   * This is called by the ArchiveAggregation queue worker.
   *
   * @param array $data
   *   The aggregation data containing:
   *   - type: keyword, theme, annual, annual_keyword, annual_theme.
   *   - term: the term to aggregate on.
   *   - archive_ids: the archive ids to include in the aggregation.
   *
   * @return bool
   *   TRUE on success, FALSE on failure.
   */
  public function createAggregateArchive(array $data): bool {
    if ($this->archiverSettings->get('archive') !== '1') {
      // Archiving is turned off, so bail out.
      return FALSE;
    }
    $type = $data['type'] ?? '';
    $aggregate_of = $data['aggregate_of'];
    $term = $data['term'] ?? '';
    $archive_ids = $data['archive_ids'] ?? [];
    $private = $data['private'] ?? FALSE;

    $msg_data = [
      '@type' => $type,
      '@term' => (!empty($term)) ? $term : t('unspecified'),
      '@private' => $private,
      '@ids' => implode(', ', $data['archive_ids']),
    ];
    if (empty($archive_ids)) {
      // Bail because there is nothing to work with.
      $this->logger->error("No archive IDs provided to create current aggregate archive. type: '@type' term: '@term'", $msg_data);
      return FALSE;
    }
    if (!empty($data['type']) && !empty($data['archive_ids'])) {
      $this->logger->info('Creating aggregate archive for @type: @term with archives @ids', $msg_data);
      if ($type === 'current') {
        // Current needs to pull file data from node:data:datasets.
        $zip_info = $this->createAggregatedZipFile($type, $aggregate_of, $term, $this->aggregateArchiveFiles($archive_ids, 'dataset'), $private);
      }
      else {
        // Anything non 'current' needs to pull data from archives.
        $zip_info = $this->createAggregatedZipFile($type, $aggregate_of, $term, $this->aggregateArchiveFiles($archive_ids, 'dda_archive'), $private);
      }

      if (empty($zip_info)) {
        // For some reason there is no zip info.
        $this->logger->error("Failed to create zip file for aggregate archive. type: '@type' term: '@term' with archives ids: '@ids'", $msg_data);
        return FALSE;
      }

      // Need to turn this into a file entity.
      $file = File::create([
        'uri' => $zip_info['drupal_file_uri'],
        'filename' => basename($zip_info['file_url']),
        'filemime' => 'application/zip',
        'status' => 1,
      ]);
      $file->save();
      // Create DdaArchive.
      if ($type === 'current') {
        // Currents always update an existing archive if one exists.
        $archive = $this->makeGetCurrentArchive($data, $term, $private, $zip_info, $file, $archive_ids);
      }
      else {
        $archive = $this->makeNewAggregateArchive($data, $term, $private, $zip_info, $file, $archive_ids);
      }
      $archive->setSyncing(TRUE);
      $archive->save();
      return TRUE;
    }
    else {
      $this->logger->error("Invalid data sent to createAggregateArchive(). type: '@type' term: '@term' with archives ids: '@ids'", $msg_data);
      return FALSE;
    }
  }

  /**
   * Get or create a 'current' aggregate archive entity.
   *
   * @param array $data
   *   The aggregation data containing:
   *   - type: keyword, theme, annual, annual_keyword, annual_theme.
   *   - term: the term to aggregate on.
   *   - archive_ids: the archive ids to include in the aggregation.
   * @param string $term
   *   The term to aggregate on.
   * @param bool $private
   *   Whether the aggregation is private.
   * @param array $zip_info
   *   Information about the created zip file including:
   *   - file_url: the URL to the zip file.
   *   - drupal_file_uri: the Drupal file URI of the zip file.
   *   - file_size: the size of the zip file in bytes.
   * @param \Drupal\file\FileInterface $file
   *   The file entity for the zip file.
   * @param array $archive_ids
   *   An array of archive node IDs to include in the aggregation.
   *
   * @return \Drupal\dkan_dataset_archiver\Entity\DdaArchiveInterface
   *   The created DdaArchive entity.
   */
  protected function makeGetCurrentArchive(array $data, string $term, bool $private, array $zip_info, FileInterface $file, array $archive_ids): DdaArchiveInterface {
    // We have to try to load a possibly existing 'current' and modify it
    // with a new revision.
    $storage = $this->entityTypeManager->getStorage('dda_archive');
    $properties = [
      'archive_type' => 'current',
      'aggregate_of' => $data['aggregate_of'],
      'aggregate_on' => $term,
    ];
    if ($private) {
      $properties['access_level'] = Util::getAccessLevelsThatAreConsideredPrivate();
    }
    else {
      $properties['access_level'] = Util::getAccessLevelsThatAreConsideredPublic();
    }
    $existing = $storage->loadByProperties($properties);
    if (!empty($existing)) {
      /** @var \Drupal\dkan_dataset_archiver\Entity\DdaArchiveInterface $archive */
      $archive = reset($existing);
      // Update the existing archive.
      $archive->set('dataset_modified', Util::date()->format('Y-m-d'));
      $archive->set('size', $zip_info['file_size'] ?? 0);
      $archive->set('source_archives', Util::buildEntityReferenceTargets($archive_ids));
      $archive->setNewRevision(TRUE);
      $archive->setRevisionCreationTime(Util::date()->getTimestamp());
      $archive->setRevisionLogMessage("Updated due to recent saves to datasets in the $term group.");
      // Update the file entity reference.
      $archive->set('resource_files', [
        'target_id' => $file->id(),
        'display' => 1,
      ]);
      return $archive;
    }
    else {
      // Create a new one.
      return $this->makeNewAggregateArchive($data, $term, $private, $zip_info, $file, $archive_ids);
    }
  }

  /**
   * Create a new aggregate archive entity.
   *
   * @param array $data
   *   The aggregation data containing:
   *   - type: keyword, theme, annual, annual_keyword, annual_theme.
   *   - term: the term to aggregate on.
   *   - archive_ids: the archive ids to include in the aggregation.
   *   - private: whether the aggregation is private.
   * @param string $term
   *   The term to aggregate on.
   * @param bool $private
   *   Whether the aggregation is private.
   * @param array $zip_info
   *   Information about the created zip file including:
   *   - file_url: the URL to the zip file.
   *   - drupal_file_uri: the Drupal file URI of the zip file.
   *   - file_size: the size of the zip file in bytes.
   * @param \Drupal\file\FileInterface $file
   *   The file entity for the zip file.
   * @param array $archive_ids
   *   An array of archive node IDs to include in the aggregation.
   *
   * @return \Drupal\dkan_dataset_archiver\Entity\DdaArchiveInterface
   *   The created DdaArchive entity.
   */
  protected function makeNewAggregateArchive(array $data, string $term, bool $private, array $zip_info, FileInterface $file, array $archive_ids): DdaArchiveInterface {
    $archive_data = [
      'name' => "{$data['type']}: {$data['term']} " . Util::date()->format('Y-m-d H:i'),
      'archive_type' => $data['type'],
      'dataset_modified' => Util::date()->format('Y-m-d'),
      'aggregate_of' => $data['aggregate_of'],
      'aggregate_on' => $data['term'],
      'themes' => $data['type'] === 'theme' ? [$term] : [],
      'keywords' => $data['type'] === 'keyword' ? [$term] : [],
      // We only have a bool to work with, and private might be one of
      // multiple levels, so we set to either the highest private
      // 'non-public' or the lowest public.
      'access_level' => $private ? 'non-public' : 'public',
      'size' => $zip_info['file_size'] ?? 0 ,
      'status' => '1',
      'source_archives' => Util::buildEntityReferenceTargets($archive_ids),

      'resource_files' => [
        'target_id' => $file->id(),
        'display' => 1,
      ],
    ];

    $storage = $this->entityTypeManager->getStorage('dda_archive');
    /** @var \Drupal\dkan_dataset_archiver\Entity\DdaArchiveInterface $archive */
    $archive = $storage->create($archive_data);
    return $archive;
  }

  /**
   * Aggregate files from a list of archive ids and build a manifest.
   *
   * @param array $archive_ids
   *   The DdaArchive entity ids or dataset node ids to aggregate files from.
   * @param string $source_type
   *   The source to use for aggregation (e.g. 'dataset', 'dda_archive').
   *
   * @return array
   *   Array containing the file URLs and manifest text ['files', 'manifest'].
   */
  protected function aggregateArchiveFiles(array $archive_ids, string $source_type): array {
    $files = [];
    $manifest = [];
    if (empty($archive_ids)) {
      $this->logger->error('No @source_type IDs provided for aggregation.', ['@source_type' => $source_type]);
    }
    else {
      if ($source_type === 'dataset') {
        $storage = $this->entityTypeManager->getStorage('node');
        /** @var \Drupal\node\NodeInterface[] $sources */
        $sources = $storage->loadMultiple($archive_ids);
      }
      else {
        $storage = $this->entityTypeManager->getStorage('dda_archive');
        /** @var \Drupal\dkan_dataset_archiver\Entity\DdaArchive[] $sources */
        $sources = $storage->loadMultiple($archive_ids);
      }

      foreach ($sources as $source) {
        $manifest_item = [
          // Discovery pattern of archive source : dataset source.
          'name' => ($source instanceof DdaArchiveInterface) ? $source->getName() : $source->getTitle(),
          'dataset_id' => ($source instanceof DdaArchiveInterface) ? $source->get('dataset_id')->value : Util::grabMetadata($source, 'identifier'),
          'type' => ($source instanceof DdaArchiveInterface) ? $source->getArchiveType() : 'current',
          'modified_date' => ($source instanceof DdaArchiveInterface) ? $source->get('dataset_modified')->value : Util::grabMetadata($source, 'modified'),
          'access_level' => ($source instanceof DdaArchiveInterface) ? $source->get('access_level')->value : Util::grabMetadata($source, 'accessLevel'),
          'private' => ($source instanceof DdaArchiveInterface) ? (bool) $source->isPrivate() : Util::isConsideredPrivate(Util::grabMetadata($source, 'accessLevel')),
          'resources' => [],
        ];
        if ($source_type === 'dataset') {
          $resource_files = Util::grabMetadata($source, 'distribution');
          $private = Util::isDatasetPrivate($source);
        }
        else {
          $resource_files = $source->getResourceFileItems();
          $private = $source->isPrivate();
        }

        if (!empty($resource_files)) {
          foreach ($resource_files as $resource_file) {
            if ($resource_file instanceof FileInterface) {
              $file_url = $resource_file->createFileUrl(FALSE);
              $files[] = $file_url;
              $entry = [
                'filename' => $resource_file->getFilename(),
                'filesize' => $resource_file->getSize(),
                'mime_type' => $resource_file->getMimeType(),
              ];
              $manifest_item['resources'][] = $entry;
            }
            else {
              // This is directly in a dataset.
              $file_url = $resource_file->downloadURL;
              $success_by_stream = FALSE;
              // It might be a problem if this full url is not reachable.
              // Attempt local stream first.
              $deduced_stream_uri = $this->util::deduceLocaleFileStream($file_url, $private);
              if ($deduced_stream_uri) {
                $handle = fopen($deduced_stream_uri, 'r');
                if ($handle !== FALSE) {
                  $stats = fstat($handle);
                  $fileSize = $stats['size'];
                  fclose($handle);
                  $success_by_stream = TRUE;
                }
              }

              if (!$success_by_stream) {
                // Attempt URL.
                $headers = get_headers($file_url, TRUE);
                if (!$headers) {
                  $msg_vars = [
                    '@dataset__node_id' => $source->id(),
                    '@deduced_stream_uri' => $deduced_stream_uri,
                    '@url' => $file_url,
                  ];
                  $this->logger->error('Could not retrieve headers for dataset node @dataset__node_id resource file URL: @url, or using stream @deduced_stream_uri', $msg_vars);
                }
              }

              if (isset($headers['Content-Length'])) {
                $fileSize = $headers['Content-Length'];
              }

              $files[] = ($success_by_stream) ? $deduced_stream_uri : $file_url;
              $entry = [
                'filename' => basename($file_url),
                'filesize' => $fileSize ?? NULL,
                'mime_type' => $resource_file->mediaType,
              ];

              $dictionary_url = $resource_file->describedBy ?? '';
              if ($dictionary_url) {
                // Attempt local stream first.
                $deduced_dictionary_stream_uri = $this->util::deduceLocaleFileStream($dictionary_url, $private);
                $files[] = (!empty($deduced_dictionary_stream_uri) && $success_by_stream) ? $deduced_dictionary_stream_uri : $dictionary_url;
                $entry['describedBy'] = basename($dictionary_url);
              }
              $manifest_item['resources'][] = $entry;
            }
          }
        }
        $manifest[] = $manifest_item;
      }
    }
    // Remove duplicates from files since dictionaries may overlap.
    $files = array_unique($files);
    if (empty($files)) {
      $this->logger->error('No files found to aggregate from @source_type IDs: @ids', [
        '@source_type' => $source_type,
        '@ids' => implode(', ', $archive_ids),
      ]);
    }
    $manifest_text = json_encode($manifest, JSON_PRETTY_PRINT);
    return ['files' => $files, 'manifest' => $manifest_text];
  }

  /**
   * Clears cache and purges archive endpoints.
   *
   * @param string $type
   *   The type of the archive theme, keyword, annual_theme, annual_keyword.
   * @param string $theme_or_keyword
   *   The theme or keyword aggregating the archive.
   */
  public function clearCacheAndPurge(string $type, string $theme_or_keyword): void {
    // @todo fix all the routes and cache tags..
    switch ($type) {
      case 'keyword':
      case 'theme':
        $routes = [
          "api/1/archive/{$type}/{$theme_or_keyword}",
          "api/1/archive/{$type}/{$theme_or_keyword}/current-zip",
        ];
        $cache_tags = ["{$type}_archives", "{$type}_current_zips"];
        break;

      case 'annual_keyword':
        $routes = [
          "api/1/archive/keyword/{$theme_or_keyword}",
          "api/1/archive/keyword/{$theme_or_keyword}/annual",
        ];
        $cache_tags = ["keyword_archives", "keyword_current_zips"];
        break;

      case 'annual_theme':
        $routes = [
          "api/1/archive/theme/{$theme_or_keyword}",
          "api/1/archive/theme/{$theme_or_keyword}/annual",
        ];
        $cache_tags = ["theme_archives", "theme_current_zips"];
        break;

      case 'annual':
        $routes = [
          "api/1/archive/annual/",
        ];
        $cache_tags = ["annual_archives"];
        break;
    }
    // Clear relevant caches.
    // @todo rework this without purge service or optional.
    // ->archivePurgeService->clearUrls($archive_api_routes, $cache_tags);
  }

  /**
   * Create a $theme_or_keyword's aggregated zip file.
   *
   * @param string $type
   *   Archive type 'annual', 'individual', or 'aggregate'.
   * @param string|null $aggregation_type
   *   The aggregation type 'keyword' or 'theme'.
   * @param string $theme_or_keyword
   *   Theme or keyword.
   * @param array $zip_contents
   *   Array containing the file URLs and manifest text ['files', 'manifest'].
   * @param bool $private
   *   Whether the archive is private or not.
   *
   * @return array
   *   Contains the following elements:
   *   ['file_url', 'drupal_file_uri', 'file_size', 'private'].
   */
  protected function createAggregatedZipFile(string $type, ?string $aggregation_type, string $theme_or_keyword, array $zip_contents, bool $private): array {
    $zip_info = $this->archivePathAndFilename($type, $aggregation_type, $theme_or_keyword, '', $private);
    $file_url = "{$zip_info['directory']}/{$zip_info['filename']}";
    $msg_vars = [
      '%type' => $type,
      '%theme' => $theme_or_keyword,
      '%file_url' => $file_url,
    ];
    $zip = new \ZipArchive();
    $worked = $zip->open("{$zip_info['directory']}/{$zip_info['filename']}", \ZipArchive::CREATE | \ZipArchive::OVERWRITE);
    if ($worked !== TRUE) {
      $msg_vars['%error_code'] = $worked;
      $this->logger->error('Failed to initialize zip file on aggregated archive for %type: %theme - %file_url. Error: %error_code', $msg_vars);
      return [];
    }
    $this->addDownloadUrlFilesToZip($zip_contents, $zip, $private);
    $this->addOtherFilesToAggregateArchive($theme_or_keyword, $zip, $zip_contents['manifest'] ?? '');
    $success = $zip->close();
    if (!$success) {
      $this->logger->error('Failed to create zip file on aggregated archive for %type: %theme - %file_url.', $msg_vars);
      return [];
    }
    if (file_exists($file_url)) {
      // Confirmed: We have a file created.
      $file_size = filesize($file_url);
      $this->logger->notice('Created zip file for aggregated archive on %type: %theme - %file_url.', $msg_vars);
      return [
        'file_url' => $file_url,
        'drupal_file_uri' => "{$zip_info['drupal_directory']}/{$zip_info['filename']}",
        'file_size' => $file_size,
        'private' => $private,
      ];
    }
    else {
      // The zip files for some reason is not there.
      $this->logger->error('Created initially, but could not reload zip file for aggregated archive on %type: %theme - %file_url.', $msg_vars);
      return [];
    }
  }

  /**
   * Add files from a list of urls to a zip archive.
   *
   * @param array $zip_contents
   *   Contains an array of files and a json manifest ['files', 'manifest'].
   * @param \ZipArchive $zip
   *   Zip archive.
   * @param bool $private
   *   Whether the archive is private or not.
   */
  protected function addDownloadUrlFilesToZip(array $zip_contents, \ZipArchive $zip, bool $private): void {
    $file_urls = $zip_contents['files'] ?? [];
    foreach ($file_urls as $file_url) {
      [$csvPath, $csvFilename] = $this->csvPathAndFilename($file_url, $private);
      if (isset($csvPath) && isset($csvFilename)) {
        $zip->addFile("{$csvPath}/{$csvFilename}", $csvFilename);
      }
    }
  }

  /**
   * Add other files to the aggregate archive.
   *
   * @param string $theme_or_keyword
   *   Theme or keyword.
   * @param \ZipArchive $zip
   *   Archive.
   * @param string $manifest
   *   Manifest text.
   */
  private function addOtherFilesToAggregateArchive(string $theme_or_keyword, \ZipArchive $zip, string $manifest = ''): void {
    if (!empty($manifest)) {
      // Add the manifest to the zip file.
      $zip->addFromString('manifest.json', $manifest);
    }
  }

  /**
   * Return an archive's path, drupal path, and filename based on archive type.
   *
   * @param string $type
   *   Archive type: 'keyword', 'annual_keyword', 'theme', 'annual_theme',
   *   'annual'.
   * @param string|null $aggregation_type
   *   The type of aggregation: 'keyword' or 'theme'.
   * @param string $theme_or_keyword
   *   Theme or keyword to aggregate on.
   * @param string $dataset_id
   *   Dataset id. Only needed for 'individual' type.
   * @param bool $private
   *   Whether the archive is private or not.
   *
   * @return array
   *   Array with 'directory', 'drupal_directory', and 'filename' keys.
   */
  protected function archivePathAndFilename(string $type, ?string $aggregation_type = '', string $theme_or_keyword = '', string $dataset_id = '', bool $private = FALSE): array {
    $date = Util::date()->format('Y-m-d');
    $base_path = $private ? $this->util->getDrupalPrivateFilesDirectory() : $this->util->getDrupalPublicFilesDirectory();
    $directory = Util::createArchiveFilePath($type, $aggregation_type, $theme_or_keyword, $dataset_id, $date);
    $filename = Util::createArchiveFilename($type, $aggregation_type, $theme_or_keyword, $dataset_id, $date, '');
    $full_directory_path = Util::adjustStorageLocation("{$base_path}/{$directory}", $private);
    $drupal_directory_path = Util::adjustStorageLocation("public://{$directory}", $private);
    $this->util->prepareDir($drupal_directory_path);

    return [
      'directory' => $full_directory_path,
      'drupal_directory' => $drupal_directory_path,
      'filename' => $filename,
    ];
  }

  /**
   * Return a csv's path and filename.
   *
   * @param string $downloadUrl
   *   Download URL from dataset either as a URL or a stream URI.
   * @param bool $private
   *   Whether the archive is private or not.
   *
   * @return array
   *   Array with csv's path and filename values.
   */
  private function csvPathAndFilename(string $downloadUrl, bool $private): array {
    $csvFilename = basename($downloadUrl);
    // Remove stream scheme if present.
    $downloadUrl = preg_replace('/^(public|private):\/\//', '', $downloadUrl);
    // Check to see if its an archive path.
    if (str_contains($downloadUrl, 'dataset-archives/')) {
      // Its already in an archive path.
      $subdir = 'dataset-archives/';
      $pieces = explode($subdir, $downloadUrl);
      $csvPathAndFilename = $pieces[1] ?? $downloadUrl;
    }
    else {
      $subdir = '';
      $pieces = explode('files/', $downloadUrl);
      $csvPathAndFilename = $pieces[1] ?? $downloadUrl;
    }

    if (!empty($csvPathAndFilename)) {
      $csvPath = str_replace("/{$csvFilename}", '', $csvPathAndFilename);
      if ($private) {
        $csvFullPath = $this->util->getDrupalPrivateFilesDirectory() . '/' . $subdir . $csvPath;
      }
      else {
        $csvFullPath = $this->util->getDrupalPublicFilesDir() . '/' . $subdir . $csvPath;
      }

      return [$csvFullPath, $csvFilename];
    }
    $this->logger->error('Download URL %downloadUrl is not valid. Could not be added to Zip file.', ['%downloadUrl' => $downloadUrl]);
    return [];
  }

  /**
   * Queue annual archives of full, keyword or theme aggregations.
   *
   * @param string $type
   *   The type of annual archive to create: individual or aggregate.
   * @param string|null $aggregate_of
   *   The aggregation type if applicable: 'keyword' or 'theme'.
   */
  public function queueAnnualArchives(string $type = 'individual', ?string $aggregate_of = NULL): void {
    $year = Util::date()->format('Y');
    // Load all the individual archives of the current year.
    $storage = $this->entityTypeManager->getStorage('dda_archive');
    $query = $storage->getQuery()
      ->condition('archive_type', $type, '=')
      ->condition('dataset_modified', "{$year}-01-01", '>=')
      ->condition('status', 1)
      // Doesn't matter who is running the query, we need them all.
      ->accessCheck(FALSE);
    if ($aggregate_of) {
      $query->condition('aggregate_of', $aggregate_of, '=');
    }
    $archive_ids = $query->execute();
    if (empty($archive_ids)) {
      $this->logger->info("No archives found for creating annual @type archives.", ['@type' => $type]);
      return;
    }
    $archives = $storage->loadMultiple($archive_ids);
    // Need to collate them if keyword or theme.
    $collated = $this->collateArchivesByType($archives, $type, $aggregate_of);

    // Loop through the collated and queue them.
    foreach ($collated as $term => $archives) {
      foreach ($archives as $privacy => $archive_infos) {
        $term_name = ($type === 'individual') ? '' : $term;
        $archive_ids = array_column($archive_infos, 'id');
        $private_bool = ($privacy === 'private');
        $this->addToAggregationQueue('annual', $term_name, $archive_ids, $private_bool, $aggregate_of);
      }
    }
  }

  /**
   * Collate archives by type.
   *
   * @param array $archives
   *   Array of DdaArchive entities.
   * @param string $type
   *   The type of archive: 'individual', 'aggregate', 'annual' to collate.
   * @param string|null $aggregate_of
   *   The aggregation of if applicable: 'keyword' or 'theme'.
   *
   * @return array
   *   Collated array of archives.
   */
  protected function collateArchivesByType(array $archives, string $type, ?string $aggregate_of): array {
    $collated = [];
    /** @var \Drupal\dkan_dataset_archiver\Entity\DdaArchive $archive */
    foreach ($archives as $archive) {
      $private = $archive->isPrivate() ? 'private' : 'public';
      switch ($type) {
        case 'aggregate':
          $term = $archive->get('aggregate_on')->value;
          if (!empty($term) && !empty($aggregate_of)) {
            $archive_info = $this->createArchiveData($archive, $aggregate_of);
            (!isset($collated[$term][$private])) ? $collated[$term][$private] = [$archive_info] : $collated[$term][$private][$archive_info['id']] = $archive_info;
          }
          break;

        case 'individual':
          $archive_info = $this->createArchiveData($archive, $type);
          (!isset($collated[$type][$private])) ? $collated[$type][$private] = [$archive_info] : $collated[$type][$private][$archive_info['id']] = $archive_info;

          break;
      }
    }
    return $collated;
  }

  /**
   * Create an array of archive data from a DdaArchive entity.
   *
   * @param \Drupal\dkan_dataset_archiver\Entity\DdaArchiveInterface $archive
   *   The DdaArchive entity.
   * @param string $type
   *   The type of archive: 'individual', 'keyword', 'theme', 'annual_keyword',
   *   'annual_theme', 'annual', or 'current'.
   *
   * @return array
   *   Array of archive data.
   */
  protected function createArchiveData(DdaArchiveInterface $archive, string $type): array {
    return [
      'id' => $archive->id(),
      'type' => $type,
      'annual_create' => $year = Util::date()->format('Y'),
      'private' => $archive->isPrivate(),
      'dataset_id' => $archive->get('dataset_id')->value,
      'modified_date' => $archive->get('dataset_modified')->value,
      // @todo Assemble the files correctly local? remote?.
      'resource_files' => array_column($archive->getResourceFileItems(), 'uri'),
    ];
  }

  /**
   * Create an individual archive for a dataset.
   *
   * @param \Drupal\node\NodeInterface $data
   *   The data object.
   */
  public function createIndividualArchive(NodeInterface $data): void {
    if (empty($this->archiverSettings->get('archive')) || !$this->isArchiveWorthy($data)) {
      // Archiving is disabled or the dataset is not archive worthy so bail out.
      return;
    }

    $datetime_string = Util::grabMetadata($data, 'modified');
    $datetime_object = new DrupalDateTime($datetime_string, 'UTC');
    $datetime_object->setTimezone(new \DateTimeZone(DateTimeItemInterface::STORAGE_TIMEZONE));
    $formatted_modified_date = $datetime_object->format(DateTimeItemInterface::DATE_STORAGE_FORMAT);
    $dataset_id = Util::grabMetadata($data, 'identifier') ?? t('undefined');
    $file_size = 0;
    $archive_data = [
      'name' => Util::grabMetadata($data, 'title'),
      'archive_type' => 'individual',
      'access level' => Util::grabMetadata($data, 'accessLevel'),
      'dataset_modified' => $formatted_modified_date,
      'dataset_id' => $dataset_id,
      'themes' => $this->prepMultiValueFieldForStorage(Util::grabMetadata($data, 'theme'), 'theme'),
      'keywords' => $this->prepMultiValueFieldForStorage(Util::grabMetadata($data, 'keyword'), 'keyword'),
      'private' => Util::isDatasetPrivate($data),
      'distributions' => Util::grabMetadata($data, 'distribution') ?? [],
      'resource_files' => $this->prepMultiValueFileFieldForStorage($formatted_modified_date, $dataset_id, Util::grabMetadata($data, 'distribution'), $file_size, Util::isDatasetPrivate($data)),
      'size' => $file_size,
      'status' => '1',
    ];

    $storage = $this->entityTypeManager->getStorage('dda_archive');
    /** @var \Drupal\dkan_dataset_archiver\Entity\DdaArchiveInterface $archive */
    $archive = $storage->create($archive_data);
    $archive->setSyncing(TRUE);
    $archive->save();
  }

  /**
   * Prepare a multi-value field for storage w/ look-up of entity references.
   *
   * @param array $values
   *   Array of values to prepare.
   * @param string $type
   *   The type of field, either 'theme', 'keyword', or ''.
   *
   * @return array
   *   Array formatted for storage.
   */
  public function prepMultiValueFieldForStorage(array $values, string $type = ''): array {
    $items = [];
    $reference_types = ['keyword', 'theme'];
    foreach ($values as $value) {
      if (Uuid::isValid($value) && in_array($type, $reference_types)) {
        // It is a uuid, so look it up.
        $referenced_entity = $this->getStorage($type)->retrieve($value, FALSE);
        if ($referenced_entity) {
          $value = json_decode($referenced_entity)->data ?? t('not found');
        }
      }
      // Map the value.
      $items[] = ['value' => $this->getMappedTerm($value)];
    }
    return $items;
  }

  /**
   * Prepare a multi-value File field for storage.
   *
   * @param string $modified_date
   *   The modified date of the dataset. Format Y-m-d.
   * @param string $dataset_id
   *   The identifier of the dataset.
   * @param array $values
   *   Array of values to prepare.
   * @param int $file_size
   *   The cumulative size of the files added, passed by reference.
   * @param bool $private
   *   TRUE if the files should be put in private storage.
   *
   * @return array
   *   Array of file entities.
   */
  public function prepMultiValueFileFieldForStorage(string $modified_date, string $dataset_id, array $values, int &$file_size, bool $private = FALSE): array {
    $items = [];
    $archive_path_and_filename = $this->archivePathAndFilename('individual', '', '', $dataset_id, $private);
    $destination_directory = $archive_path_and_filename['drupal_directory'];
    $destination_directory = Util::adjustStorageLocation($destination_directory, $private);
    foreach ($values as $resource) {
      if (Uuid::isValid($resource)) {
        // It is a uuid, so look it up.
        $referenced_entity = $this->getStorage('distribution')->retrieve($resource, FALSE);
        if ($referenced_entity) {
          $distribution_data = json_decode($referenced_entity)->data;
          $file_url = $distribution_data->downloadURL;
          $filename = basename(parse_url($file_url, PHP_URL_PATH));
          $filename = "{$dataset_id}_{$modified_date}_{$filename}";
          $file_content = $this->loadResource($file_url, $private);
          if ($file_content === FALSE) {
            $this->logger->error('Failed to get file content from %file_url for dataset %dataset_id.', [
              '%file_url' => $file_url,
              '%dataset_id' => $dataset_id,
            ]);
          }
          $destination = "{$destination_directory}/{$filename}";
          $dictionary_url = $distribution_data->describedBy ?? '';
          $dictionary_filename = basename(parse_url($dictionary_url, PHP_URL_PATH));
          $dictionary_content = !empty($dictionary_url) ? file_get_contents($dictionary_url) : '';
          if ($dictionary_content === FALSE) {
            $this->logger->error('Failed to get dictionary content from %dictionary_url for dataset %dataset_id.', [
              '%dictionary_url' => $dictionary_url,
              '%dataset_id' => $dataset_id,
            ]);
          }

          try {
            // Ensure the destination directory exists.
            $directory_exists = $this->fileSystem->prepareDirectory($destination_directory, FileSystemInterface::CREATE_DIRECTORY);
            if (!$directory_exists) {
              throw new \Exception("Failed to create the directory $destination_directory");
            }
            if (!$file_content) {
              throw new \Exception("Failed to get file content from $file_url");
            }
            if ($dictionary_url && !$dictionary_content) {
              throw new \Exception("Failed to get dictionary content from $dictionary_url");
            }

            // Save the data to the file and create a managed file entity.
            $managed_file = $this->fileRepository->writeData($file_content, $destination, FileExists::Replace);
            if ($managed_file instanceof FileInterface) {
              $items[] = ['target_id' => $managed_file->id()];
              $file_size += $managed_file->getSize();
              $this->logger->info('Created dataset archive file successfully: @file', ['@file' => $managed_file->getFileUri()]);
            }
            else {
              $this->logger->error('Failed to save archive file. @file', ['@file' => $destination]);
            }
            // Save the dictionary file if it exists.
            if ($dictionary_content) {
              $dictionary_destination = "{$destination_directory}/{$dictionary_filename}";
              $managed_dictionary_file = $this->fileRepository->writeData($dictionary_content, $dictionary_destination, FileExists::Replace);
              if ($managed_dictionary_file instanceof FileInterface) {
                $items[] = ['target_id' => $managed_dictionary_file->id()];
                $file_size += $managed_dictionary_file->getSize();
                $this->logger->info('Created dataset data dictionary archive file successfully: @file', ['@file' => $managed_dictionary_file->getFileUri()]);
              }
              else {
                $this->logger->error('Failed to save archive dictionary file. @file', ['@file' => $dictionary_destination]);
              }

            }
          }
          catch (\Exception $e) {
            $this->logger->error('Error saving archive file: @error', ['@error' => $e->getMessage()]);
          }
        }
      }
    }
    return $items;
  }

  /**
   * Load a resource from a URL, local or remote.
   *
   * @param string $file_url
   *   The URL of the file to load.
   * @param bool $private
   *   Whether the file is private or not.
   *
   * @return string|false
   *   The file content as a string, or FALSE on failure.
   */
  protected function loadResource(string $file_url, bool $private = FALSE): string|false {
    // For speed and access reasons, we are taking a two step approach.
    // Since resources could be local or remote, we are going to try to
    // find them locally first, then try the remote if that fails.
    if (empty($file_url)) {
      // Nothing to attempt to load. Bail out.
      return FALSE;
    }
    $file_content = FALSE;
    // This is an optimistic attempt to load locally first.
    $local_file_stream = Util::deduceLocaleFileStream($file_url, $private);
    if (!empty($local_file_stream)) {
      $file_content = file_get_contents($local_file_stream);
    }
    if ($file_content === FALSE) {
      // Failed to load locally using stream, try direct URL.
      $this->logger->info('Failed to load resource locally from file stream %local_file_stream.', [
        '%local_file_stream' => $local_file_stream,
        '%file_url' => $file_url,
      ]);
      $file_content = file_get_contents($file_url);
      if ($file_content === FALSE) {
        $this->logger->error('Failed to load resource from URL %file_url.', [
          '%file_url' => $file_url,
        ]);
      }
    }

    return empty($file_content) ? FALSE : $file_content;
  }

  /**
   * Check if this data is worthy of being having an archive.
   *
   * @param \Drupal\node\NodeInterface $data
   *   The data object.
   */
  public function isArchiveWorthy(NodeInterface $data): bool {
    if (!Util::isDataset($data) || !$data->isPublished()) {
      return FALSE;
    }
    // Check for private datasets.
    if ((!$this->archiverSettings->get('archive_private')) && (Util::isDatasetPrivate($data))) {
      return FALSE;
    }

    if ($data->isNew() && !empty(Util::grabMetadata($data, 'distribution'))) {
      // It is new, and it has a resource(s).
      return TRUE;
    }
    else {
      return Util::archivableDataChanged($data,);
    }
  }

  /**
   * Get storage.
   *
   * @param string $schema_id
   *   The {schema_id} slug from the HTTP request.
   *
   * @return \Drupal\metastore\Storage\NodeData
   *   Entity storage.
   */
  protected function getStorage(string $schema_id): NodeData {
    if (!isset($this->storages[$schema_id])) {
      $this->storages[$schema_id] = $this->metastoreStorage->getInstance($schema_id);
    }
    return $this->storages[$schema_id];
  }

  /**
   * Get and set the term map from configuration.
   *
   * @return array
   *   Associative array of term map.
   *   Key is the original term, value is the mapped final term.
   */
  public function getMap(): array {
    if (empty($this->termMap)) {
      $map = $this->archiverSettings->get('theme_keyword_map');
      $raw_array = explode(PHP_EOL, $map);
      $term_map = [];
      foreach ($raw_array as $line) {
        $pieces = explode('->', $line);
        if (count($pieces) === 2) {
          $term_map[trim($pieces[0])] = trim($pieces[1]);
        }
      }
      $this->termMap = $term_map;
    }
    return $this->termMap;
  }

  /**
   * Get the mapped term from configuration, or return the original if no map.
   *
   * @param string $term
   *   The original term.
   *
   * @return string
   *   The mapped term, or the original if no map exists.
   */
  public function getMappedTerm(string $term): string {
    $map = $this->getMap();
    return $map[$term] ?? $term;
  }

  /**
   * Check if a term is blocked from archiving.
   *
   * @param string $type
   *   Either 'keyword' or 'theme'.
   * @param string $term
   *   The term to check.
   *
   * @return bool
   *   TRUE if the term is blocked, FALSE if not.
   */
  public function isBlockedTerm($type, $term): bool {
    $skip_type = "{$type}s_to_skip";
    $skip_map = $this->archiverSettings->get($skip_type) ?? [];
    $skip_map = explode(PHP_EOL, $skip_map);
    return in_array($term, $skip_map);
  }

  /**
   * Get dataset ids by theme or keyword.
   *
   * @param string $type
   *   Either 'keyword' or 'theme'.
   * @param string $theme_or_keyword
   *   The theme or keyword to search for.
   *
   * @return array
   *   Array of dataset ids for datasets that match the search. Separated
   *   by 'private' and 'public' keys.
   */
  public function getDatasetsIdsByThemeOrKeyword(string $type, string $theme_or_keyword): array {
    // Since themes or keywords can be mapped, we can not use a metastore
    // query directly. We have to load them and map them each.
    // This is not efficient or fast and the approach makes me sad.
    // @todo Rework this to be more efficient.
    $datasets = $this->getAllPublishedDatasets();
    $matched_dataset_ids = [];
    /** @var \Drupal\node\NodeInterface $dataset */
    foreach ($datasets as $dataset) {
      $terms = Util::grabMetadata($dataset, $type);
      $mapped_terms = array_map([$this, 'getMappedTerm'], $terms);
      if (in_array($theme_or_keyword, $mapped_terms)) {
        $private_text = Util::isDatasetPrivate($dataset) ? 'private' : 'public';
        $matched_dataset_ids[$private_text][] = $dataset->id();
      }
    }

    return $matched_dataset_ids;
  }

  /**
   * Get all published datasets, cached for subsequent calls.
   *
   * @return array
   *   Array of NodeInterface objects.
   */
  protected function getAllPublishedDatasets(): array {
    if (empty($this->publishedDatasets)) {
      $storage = $this->entityTypeManager->getStorage('node');
      $query = $storage->getQuery()
        ->condition('type', 'data')
        ->condition('field_data_type', 'dataset')
        ->condition('status', 1)
        // Doesn't matter who is running the query, we need them all.
        ->accessCheck(FALSE);
      $dataset_ids = $query->execute();
      $datasets = $storage->loadMultiple($dataset_ids);
      // Essentially caching these for subsequent calls.
      $this->publishedDatasets = $datasets;
    }

    return $this->publishedDatasets;
  }

  /**
   * Build aggregate metadata from all published datasets.
   *
   * @return array
   *   Array with 'theme' and 'keyword' keys, each containing 'public' and
   *   'private' keys with arrays of dataset ids.
   */
  public function buildAggregateMetaDataFromAllPublishedDatasets(): array {
    $datasets = $this->getAllPublishedDatasets();
    $aggregated_metadata = [
      'keyword' => ['public' => [], 'private' => []],
      'theme' => ['public' => [], 'private' => []],
    ];
    /** @var \Drupal\node\NodeInterface $dataset */
    foreach ($datasets as $dataset) {
      $privacy = Util::isDatasetPrivate($dataset) ? 'private' : 'public';
      // Check for private datasets.
      if ((!$this->archiverSettings->get('archive_private')) && (Util::isDatasetPrivate($dataset))) {
        // We are not supposed to archive private datasets so skip it.
        continue;
      }

      $node_id = $dataset->id();
      // Can we aggregate by theme?
      if ($this->archiverSettings->get('archive_by_theme')) {
        $themes = Util::grabMetadata($dataset, 'theme');
        foreach ($themes as $theme) {
          $mapped_theme = $this->getMappedTerm($theme);
          if (!$this->isBlockedTerm('theme', $mapped_theme)) {
            $aggregated_metadata['theme'][$privacy][$mapped_theme][] = $node_id;
          }
        }
      }

      // Can we aggregate by keyword?
      if ($this->archiverSettings->get('archive_by_keyword')) {
        $keywords = Util::grabMetadata($dataset, 'keyword');
        foreach ($keywords as $keyword) {
          $mapped_keyword = $this->getMappedTerm($keyword);
          if (!$this->isBlockedTerm('keyword', $mapped_keyword)) {
            $aggregated_metadata['keyword'][$privacy][$mapped_keyword][] = $node_id;
          }
        }
      }
    }
    return $aggregated_metadata;
  }

  /**
   * Re-queue current archiving for all published datasets.
   *
   * @return int
   *   The number of archives queued.
   */
  public function reQueueCurrentArchiving(): int {
    $archive_count = 0;
    if (!$this->archiverSettings->get('create_current_download')) {
      // We are not creating current download archives.
      return $archive_count;
    }
    $aggregates = $this->buildAggregateMetaDataFromAllPublishedDatasets();
    // Queue aggregates.
    foreach ($aggregates as $type => $privacy_groups) {
      foreach ($privacy_groups as $privacy => $terms) {
        foreach ($terms as $term => $dataset_ids) {
          $private_bool = ($privacy === 'private');
          $this->addToAggregationQueue('current', $term, $dataset_ids, $private_bool, $type);
          $archive_count++;
        }
      }
    }
    return $archive_count;
  }

}
