<?php

namespace Drupal\dkan_dataset_archiver\Service;

use Drupal\common\DatasetInfo;
use Drupal\Component\Uuid\Uuid;
use Drupal\Core\Config\ConfigFactoryInterface;
use Drupal\Core\Datetime\DrupalDateTime;
use Drupal\Core\DependencyInjection\ContainerInjectionInterface;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Extension\ModuleHandler;
use Drupal\Core\File\FileExists;
use Drupal\Core\File\FileSystemInterface;
use Drupal\Core\Queue\QueueFactory;
use Drupal\datetime\Plugin\Field\FieldType\DateTimeItemInterface;
use Drupal\dkan_dataset_archiver\AwsS3Trait;
use Drupal\dkan_dataset_archiver\HelperTrait;
use Drupal\file\Entity\File;
use Drupal\file\FileInterface;
use Drupal\file\FileRepositoryInterface;
use Drupal\metastore_search\Search;
use Drupal\metastore\Storage\DataFactory;
use Drupal\metastore\Storage\NodeData;
use Drupal\node\NodeInterface;
use Procrastinator\Result;
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Symfony\Component\Finder\Finder;
use Symfony\Component\Finder\SplFileInfo;

/**
 * Archive Service.
 */
class ArchiveService implements ContainerInjectionInterface {

  use AwsS3Trait;
  use HelperTrait;

  const README = 'readme.txt';

  /**
   * The file repository service.
   *
   * @var \Drupal\file\FileRepositoryInterface
   */
  protected $fileRepository;

  /**
   * The file system service.
   *
   * @var \Drupal\Core\File\FileSystemInterface
   */
  protected $fileSystem;

  /**
   * The archiver settings.
   *
   * @var \Drupal\Core\Config\ImmutableConfig
   */
  protected $archiverSettings;

  /**
   * Dkan datasetInfo.
   *
   * @var \Drupal\common\DatasetInfo
   */
  private $datasetInfo;

  /**
   * The entity type manager.
   *
   * @var \Drupal\Core\Entity\EntityTypeManagerInterface
   */
  private $entityTypeManager;

  /**
   * The dkan_dataset_archiver logger channel.
   *
   * @var \Psr\Log\LoggerInterface
   */
  protected $logger;

  /**
   * The metastore search api service.
   *
   * @var \Drupal\metastore_search\Search
   */
  private $metastoreSearch;

  /**
   * Metastore Storage factory.
   *
   * @var \Drupal\metastore\Storage\DataFactory
   */
  protected $metastoreStorage;

  /**
   * The module handler.
   *
   * @var \Drupal\Core\Extension\ModuleHandler
   */
  protected $moduleHandler;


  /**
   * The queue factory.
   *
   * @var \Drupal\Core\Queue\QueueFactory
   */
  protected $queue;

  /**
   * Storages.
   *
   * @var array
   */
  protected $storages = [];

  /**
   * Term map to convert or coalesce terms.
   *
   * @var array
   */
  protected $termMap = [];

  /**
   * Utility of helper functions.
   *
   * @var \Drupal\dkan_dataset_archiver\Service\Util
   */
  protected $util;

  /**
   * Current year.
   *
   * @var string
   */
  private $year;

  /**
   * {@inheritDoc}
   *
   * @param \Drupal\Core\Config\ConfigFactoryInterface $config_factory
   *   The config factory.
   * @param \Drupal\dkan_dataset_archiver\Service\Util $util
   *   Utility.
   * @param \Drupal\common\DatasetInfo $datasetInfo
   *   DKAN datasetInfo.
   * @param \Drupal\metastore_search\Search $metastoreSearchService
   *   Metastore Search wrapper for the SearchApi.
   * @param \Drupal\metastore\Storage\DataFactory $metastoreStorage
   *   Metastore Storage factory.
   * @param \Drupal\Core\Entity\EntityTypeManagerInterface $entityTypeManager
   *   The entity type manager.
   * @param \Drupal\file\FileRepositoryInterface $file_repository
   *   The file repository service.
   * @param \Drupal\Core\File\FileSystemInterface $file_system
   *   The file system service.
   * @param \Psr\Log\LoggerInterface $logger
   *   The dkan_dataset_archiver logger channel.
   * @param \Drupal\Core\Extension\ModuleHandler $moduleHandler
   *   The module handler.
   * @param \Drupal\Core\Queue\QueueFactory $queue
   *   The queue factory.
   */
  public function __construct(
    ConfigFactoryInterface $config_factory,
    Util $util,
    DatasetInfo $datasetInfo,
    Search $metastoreSearchService,
    DataFactory $metastoreStorage,
    EntityTypeManagerInterface $entityTypeManager,
    FileRepositoryInterface $file_repository,
    FileSystemInterface $file_system,
    LoggerInterface $logger,
    ModuleHandler $moduleHandler,
    QueueFactory $queue,
  ) {
    $this->datasetInfo = $datasetInfo;
    $this->queue = $queue;
    $this->moduleHandler = $moduleHandler;
    $this->util = $util;
    $this->year = Util::date()->format('Y');
    $this->logger = $logger;
    $this->metastoreSearch = $metastoreSearchService;
    $this->entityTypeManager = $entityTypeManager;
    $this->fileRepository = $file_repository;
    $this->fileSystem = $file_system;
    $this->archiverSettings = $config_factory->get('dkan_dataset_archiver.settings');
    $this->metastoreStorage = $metastoreStorage;
  }

  /**
   * {@inheritDoc}
   */
  public static function create(ContainerInterface $container): self {
    return new static(
      $container->get('config.factory'),
      $container->get('dkan_dataset_archiver.util'),
      $container->get('dkan.dataset_info'),
      $container->get('dkan.metastore_search.service'),
      $container->get('dkan.metastore.storage'),
      $container->get('entity_type.manager'),
      $container->get('file.repository'),
      $container->get('file_system'),
      $container->get('logger.channel.dkan_dataset_archiver'),
      $container->get('module_handler'),
      $container->get('queue'),
    );
  }

  /**
   * Bring content from S3's archive.
   */
  public function fetch(): void {
    /** @var \Procrastinator\Result $result */
    $result = $this->retrieveYearsFromS3Bucket($this->archiverSettings->get('remote_address'));

    $level = $result->getStatus() == Result::ERROR ? LogLevel::ERROR : LogLevel::INFO;
    $message = $result->getStatus() == Result::ERROR ? $result->getError() : 'All files were retrieved';
    $this->logger->log($level, $message, []);
  }

  /**
   * Retrieve years from S3 bucket.
   *
   * @param string $bucketName
   *   The S3 bucket name.
   *
   * @return mixed
   *   The result of the operation. @todo Investigate and make more specific.
   */
  private function retrieveYearsFromS3Bucket($bucketName) {
    $this->s3Bucket = $bucketName;

    $fileSystem = $this->getAwsS3Filesystem();

    $contents = $fileSystem->listContents('', TRUE);
    $previous_year = (int) Util::date('-1 year')->format('Y');
    $oldest_year = $previous_year - $this->archiverSettings->get('local_years_retained');

    $filePaths = array_values(array_filter(array_map(function ($thing) use ($bucketName, $oldest_year) {
      $info = [];
      if ($thing['type'] == 'file' && $this->archiveYearAllowed($thing['dirname'], $oldest_year)) {
        $info['path'] = "s3://{$bucketName}/" . $thing['path'];
        $info['directory'] = $thing['dirname'];
      }
      return $info;
    }, $contents), function ($item) {
      return !empty($item);
    }));

    foreach ($filePaths as $info) {
      $directory = $this->util->getDrupalPublicFilesDir() . '/archive/' . $info['directory'];
      if (!file_exists($directory)) {
        mkdir($directory, 0777, TRUE);
      }
      $fileFetcher = $this->getFileFetcher($info['path'], $directory);
      $result = $fileFetcher->run();
      if ($result->getStatus() == Result::ERROR) {
        return $result;
      }
    }

    $result = new Result();
    $result->setStatus(Result::DONE);
    return $result;
  }

  /**
   * Ensure archive year is allowed.
   *
   * @param string $directory
   *   Directory containing the archive.
   * @param int $oldest_year
   *   Oldest year allowed.
   *
   * @return bool
   *   TRUE if the year is within the allowed range.
   */
  private function archiveYearAllowed(string $directory, int $oldest_year): bool {
    if (empty($directory)) {
      return FALSE;
    }
    $pieces = explode('/', $directory);
    if (empty($pieces)) {
      return FALSE;
    }
    $year = end($pieces);
    return is_numeric($year) && (int) $year > $oldest_year;
  }

  /**
   * Queue aggregate archiving of a theme or keywords's individual archives.
   *
   * @param string $type
   *   Either 'keyword' or 'theme'.
   * @param string $term
   *   The term to aggregate on.
   * @param array $archive_ids
   *   The archive ids to include in the aggregation.
   */
  public function addToAggregationQueue($type, $term, $archive_ids): void {
    /** @var \Drupal\Core\Queue\QueueInterface $archiveQueue */
    $archiveQueue = $this->queue->get('archive_aggregation');
    $aggregation_data = [
      'type' => $type,
      'term' => $term,
      'archive_ids' => $archive_ids,
    ];
    $queue_id = $archiveQueue->createItem($aggregation_data);

    $this->logger->info(
      "@type @term had queued archives '@ids' for aggregation queue_id: %queueId", [
        '@type' => $type,
        '@term' => $term,
        '@ids' => implode(',', $archive_ids),
        '%queueId' => $queue_id,
      ]
    );
  }

  /**
   * Create an aggregate archive for a theme or keyword.
   *
   * This is called by the ArchiveAggregation queue worker.
   *
   * @param array $data
   *   The aggregation data containing:
   *   - type: 'keyword' or 'theme'.
   *   - term: the term to aggregate on.
   *   - archive_ids: the archive ids to include in the aggregation.
   *
   * @return bool
   *   TRUE on success, FALSE on failure.
   */
  public function createAggregateArchive(array $data): bool {
    if ($this->archiverSettings->get('archive') !== '1') {
      // Archiving is turned off, so bail out.
      return FALSE;
    }
    $type = $data['type'] ?? '';
    $term = $data['term'] ?? '';
    $archive_ids = $data['archive_ids'] ?? [];
    // @todo private is missing and needs to be added to the queue data.
    $private = $data['private'] ?? FALSE;

    $msg_data = [
      '@type' => $type,
      '@term' => (!empty($term)) ? $term : t('unspecified'),
      '@private' => $private,
      '@ids' => implode(', ', $data['archive_ids']),
    ];
    if (!empty($data['type']) && !empty($data['term']) && !empty($data['archive_ids'])) {
      $this->logger->info('Creating aggregate archive for @type: @term with archives @ids', $msg_data);
      // Create DdaArchive.
      $zip_info = $this->createAggregatedZipFile($type, $term, $this->aggregateArchiveFiles($archive_ids), $private);
      // Need to turn this into a file entity.
      $file = File::create([
        'uri' => $zip_info['drupal_file_uri'],
        'filename' => basename($zip_info['file_url']),
        'filemime' => 'application/zip',
        'status' => 1,
      ]);
      $file->save();
      $archive_data = [
        'name' => "{$data['type']}: {$data['term']} " . Util::date()->format('Y-m-d H:i'),
        'archive_type' => $data['type'],
        'dataset_modified' => Util::date()->format('Y-m-d'),
        'aggregate_on' => $data['term'],
        'themes' => $data['type'] === 'theme' ? [$term] : [],
        'keywords' => $data['type'] === 'keyword' ? [$term] : [],
        'private' => $private,
        'size' => $zip_info['file_size'] ?? 0 ,
        'status' => '1',
        'source_archives' => Util::buildEntityReferenceTargets($archive_ids),
        // @todo Workout if it is private.
        'local_archive' => [
          'target_id' => $file->id(),
          'display' => 1,
        ],
        // @todo should be the s3 url if remotes are allowed but maybe adding
        // it should wait until after the mover queue moves it.
        // 'remote_url' = ??
      ];

      $storage = $this->entityTypeManager->getStorage('dda_archive');
      $archive = $storage->create($archive_data);
      $archive->save();
      return TRUE;
    }
    else {
      $this->logger->error("Invalid data sent to createAggregateArchive(). type: '@type' term: '@term' with archives ids: '@ids'", $msg_data);
      // In the case of bad data, we do not want to keep this in the queue,
      // so return TRUE to indicate it can be removed from the queue.
      return TRUE;
    }
  }

  /**
   * Aggregate files from a list of archive ids and build a manifest.
   *
   * @param array $archive_ids
   *   The DdaArchive node ids to aggregate files from.
   *
   * @return array
   *   Array containing the file URLs and manifest text ['files', 'manifest'].
   */
  protected function aggregateArchiveFiles(array $archive_ids): array {
    $files = [];
    $manifest = [];
    if (!empty($archive_ids)) {
      $storage = $this->entityTypeManager->getStorage('dda_archive');
      $archives = $storage->loadMultiple($archive_ids);
      /** @var \Drupal\dkan_dataset_archiver\Entity\DdaArchive $archive */
      foreach ($archives as $archive) {
        $manifest_item = [
          'name' => $archive->getName(),
          'dataset_id' => $archive->get('dataset_id')->value,
          'modified_date' => $archive->get('dataset_modified')->value,
          'resources' => [],
        ];
        $resource_files = $archive->getResourceFileItems();
        if (!empty($resource_files)) {
          foreach ($resource_files as $resource_file) {
            if ($resource_file instanceof FileInterface) {
              $file_url = $resource_file->createFileUrl(FALSE);
              $files[] = $file_url;
              $manifest_item['resources'][] = [
                'filename' => $resource_file->getFilename(),
                'filesize' => $resource_file->getSize(),
                'mime_type' => $resource_file->getMimeType(),
              ];
            }
          }
        }
        $manifest[] = $manifest_item;
      }
    }
    $manifest_text = json_encode($manifest, JSON_PRETTY_PRINT);
    return ['files' => $files, 'manifest' => $manifest_text];
  }

  /**
   * Clears cache and purges archive endpoints.
   *
   * @param string $type
   *   The type of the archive theme, keyword, annual_theme, annual_keyword.
   * @param string $theme_or_keyword
   *   The theme or keyword aggregating the archive.
   */
  public function clearCacheAndPurge(string $type, string $theme_or_keyword): void {
    // @todo fix all the routes and cache tags..
    switch ($type) {
      case 'keyword':
      case 'theme':
        $routes = [
          "api/1/archive/{$type}/{$theme_or_keyword}",
          "api/1/archive/{$type}/{$theme_or_keyword}/current-zip",
        ];
        $cache_tags = ["{$type}_archives", "{$type}_current_zips"];
        break;

      case 'annual_keyword':
        $routes = [
          "api/1/archive/keyword/{$theme_or_keyword}",
          "api/1/archive/keyword/{$theme_or_keyword}/annual",
        ];
        $cache_tags = ["keyword_archives", "keyword_current_zips"];
        break;

      case 'annual_theme':
        $routes = [
          "api/1/archive/theme/{$theme_or_keyword}",
          "api/1/archive/theme/{$theme_or_keyword}/annual",
        ];
        $cache_tags = ["theme_archives", "theme_current_zips"];
        break;

      case 'annual':
        $routes = [
          "api/1/archive/annual/",
        ];
        $cache_tags = ["annual_archives"];
        break;
    }
    // Clear relevant caches.
    // @todo rework this without purge service or optional.
    // ->archivePurgeService->clearUrls($archive_api_routes, $cache_tags);
  }

  /**
   * Create an individual zip for a given url. @todo consider deprecate.
   *
   * @param string $url
   *   The url to create a zip for.
   */
  public function createIndividualZip($url): void {
    if ($url) {
      $publicFiles = $this->util->getDrupalPublicFilesDir();
      $cleanUrl = explode("sites/default/files/", $url)[1];
      $fileSrc = "{$publicFiles}/{$cleanUrl}";
      $fileDest = $fileSrc . '.zip';
      $filename = explode('/', $cleanUrl);
      $filename = end($filename);

      $zip = new \ZipArchive();
      $zip->open($fileDest, \ZipArchive::CREATE | \ZipArchive::OVERWRITE);
      $zip->addFile($fileSrc, $filename);
      $zip->close();
      $this->logger->notice(
        "Zip for %fileDest created.", [
          '%fileDest' => $cleanUrl,
        ]
      );
    }
  }

  /**
   * Create an individual zip by uuid.
   *
   * @param string $uuid
   *   The uuid of the dataset to zip.
   */
  public function createIndividualZipByUuid(string $uuid): void {
    $datasetInfo = $this->datasetInfo->gather($uuid);
    if (!isset($datasetInfo['notice'])) {
      $revision = $datasetInfo['latest_revision'];
      if (isset($datasetInfo['published_revision'])) {
        $revision = $datasetInfo['published_revision'];
      }
      if (count($revision['distributions'])) {
        foreach ($revision['distributions'] as $distribution) {
          $this->createIndividualZip($distribution['file_path']);
        }
      }
    }
    else {
      $this->logger->notice("Uuid not found.");
    }
  }

  /**
   * Create a $theme_or_keyword's aggregated zip file.
   *
   * @param string $type
   *   Either 'keyword' or 'theme'.
   * @param string $theme_or_keyword
   *   Theme or keyword.
   * @param array $zip_contents
   *   Array containing the file URLs and manifest text ['files', 'manifest'].
   * @param bool $private
   *   Whether the archive is private or not.
   *
   * @return array
   *   Contains the following elements:
   *   ['file_url', 'drupal_file_uri', 'file_size', 'private'].
   */
  protected function createAggregatedZipFile(string $type, string $theme_or_keyword, array $zip_contents, bool $private): array {
    $zip_info = $this->archivePathAndFilename($type, $theme_or_keyword, $private);
    $file_url = "{$zip_info['directory']}/{$zip_info['filename']}";
    $zip = new \ZipArchive();
    $worked = $zip->open("{$zip_info['directory']}/{$zip_info['filename']}", \ZipArchive::CREATE | \ZipArchive::OVERWRITE);
    $this->addDownloadUrlFilesToZip($zip_contents, $zip);
    $this->addOtherFilesToMonthlyArchive($theme_or_keyword, $zip);
    $success = $zip->close();
    if (!$success) {
      $this->logger->error(
        'Failed to create aggregated archive on %type: %theme - %file_url.', [
          '%type' => $type,
          '%theme' => $theme_or_keyword,
          '%file_url' => $file_url,
        ]
      );
      return [];
    }
    if (file_exists($file_url)) {
      // Confirmed: We have a file created.
      $file_size = filesize($file_url);
      $this->logger->notice('Created aggregated archive on %type: %theme - %file_url.', [
        '%type' => $type,
        '%theme' => $theme_or_keyword,
        '%file_url' => $file_url,
      ]);

      $this->queueBackupToS3($theme_or_keyword, (int) $this->year, $file_url);

      return [
        'file_url' => $file_url,
        'drupal_file_uri' => "{$zip_info['drupal_directory']}/{$zip_info['filename']}",
        'file_size' => $file_size,
        'private' => $private,
      ];
    }
    else {
      // The zip files for some reason is not there.
      $this->logger->error('Failed to create aggregated archive on %type: %theme - %file_url.', [
        '%type' => $type,
        '%theme' => $theme_or_keyword,
        '%file_url' => $file_url,
      ]);
      return [];
    }
  }

  /**
   * Create a theme_or_keyword's download-all zip.
   *
   * @param string $theme_or_keyword
   *   Theme or keyword. @todo Rework this to be about Theme or keyword.
   */
  public function createDownloadAll(string $theme_or_keyword): void {
    $publicFolder = $this->util->getDrupalPublicFilesDir();
    $zipPath = $publicFolder . "/archive/{$theme_or_keyword}/current";
    $this->util->prepareDir($zipPath);

    $machine_name = strtolower(str_replace(' ', '_', $theme_or_keyword));
    $zipFilename = "{$machine_name}_current_data.zip";

    $zip = new \ZipArchive();
    $zip->open("{$zipPath}/{$zipFilename}", \ZipArchive::CREATE | \ZipArchive::OVERWRITE);
    // @todo work out the manifest
    $manifest = "I need to be worked out.";
    $downloadUrls = $this->getThemeDownloadUrls($theme_or_keyword);
    $zip_contents = [
      'files' => $downloadUrls,
      'manifest' => $manifest,
    ];
    // $this->addDownloadUrlFilesToZip($zip_contents, $zip);
    // $this->addOtherFilesToMonthlyArchive($theme_or_keyword, $zip);
    $this->logger->notice(
      'Created download-all zip for %theme in %path.', [
        '%theme' => $theme_or_keyword,
        '%path' => $zipPath,
      ]
    );
  }

  /**
   * Add files from a list of urls to a zip archive.
   *
   * @param array $zip_contents
   *   Contains an array of files and a json manifest ['files', 'manifest'].
   * @param \ZipArchive $zip
   *   Zip archive.
   */
  protected function addDownloadUrlFilesToZip(array $zip_contents, \ZipArchive $zip): void {
    $manifest = $zip_contents['manifest'] ?? '';
    if (!empty($manifest)) {
      $zip->addFromString('manifest.json', $manifest);
    }
    $file_urls = $zip_contents['files'] ?? [];
    foreach ($file_urls as $file_url) {
      [$csvPath, $csvFilename] = $this->csvPathAndFilename($file_url);
      if (isset($csvPath) && isset($csvFilename)) {
        $zip->addFile("{$csvPath}/{$csvFilename}", $csvFilename);
      }
    }
  }

  /**
   * Create a theme_or_keyword's annual archive.
   *
   * @param string $theme_or_keyword
   *   Theme or keyword.
   */
  private function createAnnualArchive(string $theme_or_keyword): void {
    // @todo rework this.
    $theme = $theme_or_keyword;
    $theme_machine_name = strtolower(str_replace(' ', '_', $theme));
    $archivePath = $this->archivePathAndFilename($theme)[0];
    $archiveName = "{$theme_machine_name}_{$this->year}.zip";

    $monthly_archives = (new Finder())
      ->files()
      ->name('/.*\d{2}_\d{4}.zip/')
      ->in($archivePath)
      ->depth('== 0');

    $zip = new \ZipArchive();
    $zip->open("{$archivePath}/{$archiveName}", \ZipArchive::CREATE | \ZipArchive::OVERWRITE);
    foreach ($monthly_archives as $monthly_archive) {
      $zip->addFile($monthly_archive->getRealPath(), $monthly_archive->getBasename());
    }
    $zip->close();

    $this->logger->notice(
      'Created %year annual archive for %theme.', [
        '%year' => $this->year,
        '%theme' => $theme,
      ]
    );

    $this->queueBackupToS3($theme_or_keyword, (int) $this->year, "{$archivePath}/{$archiveName}");
  }

  /**
   * Queue the copying of a new archive to its theme_or_keyword's folder in S3.
   *
   * @param string $theme_or_keyword
   *   Theme or keyword.
   * @param int $year
   *   Year.
   * @param string $filePath
   *   Filename including full path.
   */
  private function queueBackupToS3(string $theme_or_keyword, int $year, string $filePath): void {
    // @todo rework this.
    // This needs to be optional based on settings.
    if ($this->skipBackup($theme_or_keyword)) {
      return;
    }

    /** @var \Drupal\Core\Queue\QueueInterface $backupQueue */
    $backupQueue = $this->queue->get('backup_to_s3');
    $backupQueue->createItem([
      'provider' => $theme_or_keyword,
      'year' => $year,
      'filePath' => $filePath,
    ]);
  }

  /**
   * Copy an archive to S3.
   *
   * @param array $data
   *   Associative array with 'provider', 'year' and 'filePath' named keys.
   */
  public function backupToS3(array $data): void {
    ['theme_or_keyword' => $theme_or_keyword, 'year' => $year, 'filePath' => $filePath] = $data;

    // Check again if this 'backup_to_s3' queue item was in a database copied.
    if ($this->skipBackup($theme_or_keyword)) {
      return;
    }

    $destination = $this->archiverSettings->get('remote_address') . "/{$theme_or_keyword}/{$year}/" . basename($filePath);

    // Register the stream wrapper, if not already set.
    $fileSystem = $this->getAwsS3Filesystem();

    $stream = fopen($destination, 'w');
    // @todo set to 10_000_000 once our site runs PHP 7.4
    stream_set_chunk_size($stream, 10 * 1000 * 1000);
    copy($filePath, $destination);
  }

  /**
   * Check to see if this is exempt from backup.
   *
   * @param string $theme_or_keyword
   *   Theme or keyword @todo rework this.
   *
   * @return bool
   *   TRUE to skip, FALSE not to.
   */
  public static function skipBackup(string $theme_or_keyword) {
    // @todo Get the skip from config.
    if ('prod' !== getenv('AH_SITE_ENVIRONMENT') && 'test_minimal' !== $theme_or_keyword) {
      return TRUE;
    }
    return FALSE;
  }

  /**
   * Add other files to the monthly archive.
   *
   * @param string $theme_or_keyword
   *   Theme or keyword.
   * @param \ZipArchive $zip
   *   Archive.
   */
  private function addOtherFilesToMonthlyArchive(string $theme_or_keyword, \ZipArchive $zip): void {
    // Manifest should be added here, but it is added in during
    // addDownloadUrlFilesToZip() because that is where iterating happens.
    $this->addReadmeTxtToMonthlyArchive($zip);
    $this->addDictionariesToMonthlyArchive($theme_or_keyword, $zip);
  }

  /**
   * Add the readme.txt to the monthly archive.
   *
   * @param \ZipArchive $zip
   *   Archive.
   */
  private function addReadmeTxtToMonthlyArchive(\ZipArchive $zip): void {
    // @todo do we even need the Readme anymore? Consider deprecating this.
    $modulePath = $this->moduleHandler->getModule('dkan_dataset_archiver')->getPath();

    $readmePath = $modulePath . '/files/' . self::README;
    // $zip->addFile($readmePath, self::README);
  }

  /**
   * Add a provider's dictionaries to its monthly archive.
   *
   * @param string $theme_or_keyword
   *   Theme or keyword.
   * @param \ZipArchive $zip
   *   Archive.
   */
  private function addDictionariesToMonthlyArchive(string $theme_or_keyword, \ZipArchive $zip): void {
    // @todo This needs major reworking because dictionary must be extendable.
    // Maybe default check for existence of *dictionary.pdf and if not grab from
    // dkan in some way.
    $dictionaries = [];

    foreach ($dictionaries as $dictionary) {
      // $zip->addFile($dictionary['realPath'], $dictionary['filename']);
    }
  }

  /**
   * Return an archive's path, drupal path, and filename based on archive type.
   *
   * @param string $type
   *   Archive type: 'keyword', 'annual_keyword', 'theme', 'annual_theme',
   *   'annual'.
   * @param string $theme_or_keyword
   *   Theme or keyword to aggregate on.
   * @param bool $private
   *   Whether the archive is private or not.
   *
   * @return array
   *   Array with 'directory', 'drupal_directory', and 'filename' keys.
   */
  protected function archivePathAndFilename(string $type, string $theme_or_keyword = '', bool $private = FALSE): array {
    $date = Util::date()->format('Y-m-d');
    $file_stream = $private ? 'private://' : 'public://';
    $base_path = $private ? $this->util->getDrupalPrivateFilesDirectory() : $this->util->getDrupalPublicFilesDirectory();
    $machine_name = Util::fileNameIze($theme_or_keyword);
    switch ($type) {
      case 'keyword':
      case 'theme':
        $directory = "dataset-archives/{$type}/{$machine_name}";
        $filename = "{$machine_name}_{$date}.zip";
        break;

      case 'annual_keyword':
      case 'annual_theme':
        $directory = "dataset-archives/{$type}/{$machine_name}";
        $date = Util::date()->format('Y');
        $filename = "{$machine_name}_annual_{$date}.zip";
        break;

      case 'annual':
        $directory = "dataset-archives/{$type}";
        $site_name = \Drupal::config('system.site')->get('name');
        $machine_name = Util::fileNameIze($site_name);
        $date = Util::date()->format('Y');
        $filename = "{$machine_name}_annual_{$date}.zip";
        break;

      default:
        $this->logger->error('Not enough information to create an archive for type: @type and theme_or_keyword: @theme_or_keyword.', [
          '@type' => $type,
          '@theme_or_keyword' => $theme_or_keyword,
        ]);
        $directory = '';
        $filename = '';

        break;
    }

    $full_directory_path = "{$base_path}/{$directory}";
    $drupal_directory_path = "{$file_stream}{$directory}";
    if (!empty($directory)) {
      $this->util->prepareDir($drupal_directory_path);
    }

    return [
      'directory' => $full_directory_path,
      'drupal_directory' => $drupal_directory_path,
      'filename' => $filename,
    ];
  }

  /**
   * Return all downloadURLs from a single theme.
   *
   * @param string $theme_or_keyword
   *   Theme or keyword.
   * @param bool $respectArchiveExclude
   *   Respect metadata archiveExclude if true. Defaults to false.
   *
   * @return array
   *   Array of downloadURLs.
   */
  private function getThemeDownloadUrls(string $theme_or_keyword, bool $respectArchiveExclude = FALSE) {
    // @todo This function may not be needed since we are not getting all
    // datasets for the theme, only the ones that recently changed.
    $archiveFolder = $this->util->getDrupalPublicFilesDir() . '/archive';
    $datasets = [];
    // Though most providers only have one theme, some like test_minimal have
    // more, so gather all datasets matching each of this provider's themes.
    // @todo Rework this to be about themes, not providers.
    foreach ((array) $theme_or_keyword as $theme) {
      // Pass human readable theme as search parameter.
      $datasets += $this->metastoreSearch->search([
        'theme' => $theme,
        'page' => 1,
        'page-size' => 300,
      ])->results;
      // @todo May not be the best place for this, but works for now.
      $this->util->prepareDir("{$archiveFolder}/{$theme}");
    }
    // @todo Rework this to get exclusion from config rather than provider.
    if ($respectArchiveExclude) {
      $datasets = array_filter($datasets, function ($dataset) {
        return !isset($dataset->archiveExclude) || !$dataset->archiveExclude;
      });
    }

    if (empty($datasets)) {
      return [];
    }

    // Gather and flatten all distributions, to get their downloadURL.
    return array_column(
      array_merge(...array_column($datasets, 'distribution')),
      'downloadURL'
    );
  }

  /**
   * Return a csv's path and filename.
   *
   * @param string $downloadUrl
   *   Download URL from dataset.
   *
   * @return array
   *   Array with csv's path and filename values.
   */
  private function csvPathAndFilename(string $downloadUrl): array {
    $csvFilename = basename($downloadUrl);
    $pieces = explode('sites/default/files', $downloadUrl);
    if (isset($pieces[1])) {
      $csvPathAndFilename = explode('sites/default/files', $downloadUrl)[1];
      $csvPath = str_replace("/{$csvFilename}", '', $csvPathAndFilename);
      $csvFullPath = $this->util->getDrupalPublicFilesDir() . $csvPath;
      return [$csvFullPath, $csvFilename];
    }
    return [];
  }

  /**
   * Create annual archives for every providers.
   */
  public function createAnnualArchives(): void {
    $themes = (new Finder())
      ->directories()
      ->in(DRUPAL_ROOT . '/../src/site/files/archive')
      ->depth('== 0');

    foreach ($themes as $theme) {
      $years = (new Finder())
        ->directories()
        ->in(DRUPAL_ROOT . '/../src/site/files/archive/' . $theme->getFilename())
        ->depth('== 0');

      foreach ($years as $year) {
        $this->createAnnualThemeArchive($theme->getBasename(), $year);
      }
    }
  }

  /**
   * Helper to create an annual archive for a theme.
   *
   * @param string $theme
   *   Theme.
   * @param \Symfony\Component\Finder\SplFileInfo $year
   *   A year directory within a theme directory.
   */
  private function createAnnualThemeArchive(string $theme, SplFileInfo $year): void {
    $theme_machine_name = strtolower(str_replace(' ', '_', $theme));
    $monthly_archives = (new Finder())
      ->files()
      ->name('/.*\d{2}_\d{4}.zip/')
      ->in($year->getRealPath())
      ->depth('== 0');
    $zip = new \ZipArchive();
    $zipFilename = "{$year->getRealPath()}/{$theme_machine_name}_{$year->getBasename()}.zip";
    $zip->open($zipFilename, \ZipArchive::CREATE | \ZipArchive::OVERWRITE);
    foreach ($monthly_archives as $monthly_archive) {
      $zip->addFile($monthly_archive->getRealPath(), $monthly_archive->getBasename());
    }
    $zip->close();
  }

  /**
   * Create an individual archive for a dataset.
   *
   * @param \Drupal\node\NodeInterface $data
   *   The data object.
   */
  public function createIndividualArchive(NodeInterface $data): void {
    if (empty($this->archiverSettings->get('archive')) || !$this->isArchiveWorthy($data)) {
      // Archiving is disabled or the dataset is not archive worthy so bail out.
      return;
    }

    $datetime_string = Util::grabMetadata($data, 'modified');
    $datetime_object = new DrupalDateTime($datetime_string, 'UTC');
    $datetime_object->setTimezone(new \DateTimeZone(DateTimeItemInterface::STORAGE_TIMEZONE));
    $formatted_modified_date = $datetime_object->format(DateTimeItemInterface::DATE_STORAGE_FORMAT);
    $dataset_id = Util::grabMetadata($data, 'identifier') ?? t('undefined');
    $file_size = 0;
    $archive_data = [
      'name' => Util::grabMetadata($data, 'title'),
      'archive_type' => 'individual',
      'dataset_modified' => $formatted_modified_date,
      'dataset_id' => $dataset_id,
      'themes' => $this->prepMultiValueFieldForStorage(Util::grabMetadata($data, 'theme'), 'theme'),
      'keywords' => $this->prepMultiValueFieldForStorage(Util::grabMetadata($data, 'keyword'), 'keyword'),
      'private' => Util::isDatasetPrivate($data),
      'distributions' => Util::grabMetadata($data, 'distribution') ?? [],
      'resource_files' => (Util::isDatasetPrivate($data)) ? [] : $this->prepMultiValueFileFieldForStorage($formatted_modified_date, $dataset_id, Util::grabMetadata($data, 'distribution'), $file_size, FALSE),
      'resource_files_private' => (Util::isDatasetPrivate($data)) ? $this->prepMultiValueFileFieldForStorage($formatted_modified_date, $dataset_id, Util::grabMetadata($data, 'distribution'), $file_size, TRUE) : [],
      'size' => $file_size,
      'status' => '1',
    ];

    $storage = $this->entityTypeManager->getStorage('dda_archive');
    $archive = $storage->create($archive_data);
    $archive->save();
  }

  /**
   * Prepare a multi-value field for storage w/ look-up of entity references.
   *
   * @param array $values
   *   Array of values to prepare.
   * @param string $type
   *   The type of field, either 'theme', 'keyword', or ''.
   *
   * @return array
   *   Array formatted for storage.
   */
  public function prepMultiValueFieldForStorage(array $values, string $type = ''): array {
    $items = [];
    $reference_types = ['keyword', 'theme'];
    foreach ($values as $value) {
      if (Uuid::isValid($value) && in_array($type, $reference_types)) {
        // It is a uuid, so look it up.
        $referenced_entity = $this->getStorage($type)->retrieve($value, FALSE);
        if ($referenced_entity) {
          $value = json_decode($referenced_entity)->data ?? t('not found');
        }
      }
      // Map the value.
      $items[] = ['value' => $this->getMappedTerm($value)];
    }
    return $items;
  }

  /**
   * Prepare a multi-value File field for storage.
   *
   * @param string $modified_date
   *   The modified date of the dataset. Format Y-m-d.
   * @param string $dataset_id
   *   The identifier of the dataset.
   * @param array $values
   *   Array of values to prepare.
   * @param int $file_size
   *   The cumulative size of the files added, passed by reference.
   * @param bool $private
   *   TRUE if the files should be put in private storage.
   *
   * @return array
   *   Array of file entities.
   */
  public function prepMultiValueFileFieldForStorage(string $modified_date, string $dataset_id, array $values, int &$file_size, bool $private = FALSE): array {
    $items = [];
    $dateTime = new \DateTime($modified_date);
    $year = $dateTime->format('Y');
    // @todo Need to check if private is turned on at the system level.
    $location = $private ? 'private' : 'public';
    $destination_directory = "{$location}://dataset-archives/individual/{$year}";

    foreach ($values as $resource) {
      if (Uuid::isValid($resource)) {
        // It is a uuid, so look it up.
        $referenced_entity = $this->getStorage('distribution')->retrieve($resource, FALSE);
        if ($referenced_entity) {
          $file_url = json_decode($referenced_entity)->data->downloadURL;
          $filename = basename(parse_url($file_url, PHP_URL_PATH));
          $filename = "{$modified_date}_{$dataset_id}_{$filename}";
          // Would be faster to trim the domain if it is a local file, but it
          // it might not be local, so we have to use the slow but sure way.
          $file_content = file_get_contents($file_url);
          $destination = "{$destination_directory}/{$filename}";

          try {
            // Ensure the destination directory exists.
            $directory_exists = $this->fileSystem->prepareDirectory($destination_directory, FileSystemInterface::CREATE_DIRECTORY);
            if (!$directory_exists) {
              throw new \Exception("Failed to create the directory $destination_directory");
            }
            if (!$file_content) {
              throw new \Exception("Failed to get file content from $file_url");
            }

            // Save the data to the file and create a managed file entity.
            $managed_file = $this->fileRepository->writeData($file_content, $destination, FileExists::Replace);
            if ($managed_file instanceof FileInterface) {
              $items[] = ['target_id' => $managed_file->id()];
              $file_size += $managed_file->getSize();
              $this->logger->info('Created dataset archive file successfully: @file', ['@file' => $managed_file->getFileUri()]);
            }
            else {
              $this->logger->error('Failed to save archive file. @file', ['@file' => $destination]);
            }
          }
          catch (\Exception $e) {
            $this->logger->error('Error saving archive file: @error', ['@error' => $e->getMessage()]);
          }
        }
      }
    }
    return $items;
  }

  /**
   * Check if this data is worthy of being having an archive.
   *
   * @param \Drupal\node\NodeInterface $data
   *   The data object.
   */
  public function isArchiveWorthy(NodeInterface $data): bool {
    if (!Util::isDataset($data) || !$data->isPublished()) {
      return FALSE;
    }
    // Check for private datasets.
    if ((!$this->archiverSettings->get('archive_private')) && (Util::isDatasetPrivate($data))) {
      return FALSE;
    }
    // @todo Check settings to see if private is allowed, and if this is private.
    if ($data->isNew() && !empty(Util::grabMetadata($data, 'distribution'))) {
      // It is new, and it has a resource(s).
      return TRUE;
    }
    else {
      return Util::archivableDataChanged($data,);
    }
  }

  /**
   * Get storage.
   *
   * @param string $schema_id
   *   The {schema_id} slug from the HTTP request.
   *
   * @return \Drupal\metastore\Storage\NodeData
   *   Entity storage.
   */
  protected function getStorage(string $schema_id): NodeData {
    if (!isset($this->storages[$schema_id])) {
      $this->storages[$schema_id] = $this->metastoreStorage->getInstance($schema_id);
    }
    return $this->storages[$schema_id];
  }

  /**
   * Get and set the term map from configuration.
   *
   * @return array
   *   Associative array of term map.
   *   Key is the original term, value is the mapped final term.
   */
  public function getMap(): array {
    if (empty($this->termMap)) {
      $map = $this->archiverSettings->get('theme_keyword_map');
      $raw_array = explode(PHP_EOL, $map);
      $term_map = [];
      foreach ($raw_array as $line) {
        $pieces = explode('->', $line);
        if (count($pieces) === 2) {
          $term_map[trim($pieces[0])] = trim($pieces[1]);
        }
      }
      $this->termMap = $term_map;
    }
    return $this->termMap;
  }

  /**
   * Get the mapped term from configuration, or return the original if no map.
   *
   * @param string $term
   *   The original term.
   *
   * @return string
   *   The mapped term, or the original if no map exists.
   */
  public function getMappedTerm(string $term): string {
    $map = $this->getMap();
    return $map[$term] ?? $term;
  }

  /**
   * Check if a term is blocked from archiving.
   *
   * @param string $type
   *   Either 'keyword' or 'theme'.
   * @param string $term
   *   The term to check.
   *
   * @return bool
   *   TRUE if the term is blocked, FALSE if not.
   */
  public function isBlockedTerm($type, $term): bool {
    $skip_type = "{$type}s_to_skip";
    $skip_map = $this->archiverSettings->get($skip_type) ?? [];
    $skip_map = explode(PHP_EOL, $skip_map);
    return in_array($term, $skip_map);
  }

}
