<?php

namespace Drupal\dkan_dataset_archiver\Service;

use Drupal\Core\Datetime\DrupalDateTime;
use Drupal\Core\Field\FieldItemListInterface;
use Drupal\Core\File\FileSystem;
use Drupal\node\NodeInterface;
use Drupal\taxonomy\Entity\Term;

/**
 * A collection of utility functions.
 */
class Util {

  /**
   * Check if any of the archivable data changed.
   *
   * @param \Drupal\node\NodeInterface $data
   *   The data object.
   *
   * @return bool
   *   TRUE if archivable data changed, FALSE if not.
   */
  public static function archivableDataChanged(NodeInterface $data): bool {
    if (Util::isDataset($data)) {
      if (
        Util::datasetFieldChanged($data, 'title') ||
        Util::datasetFieldChanged($data, 'modified') ||
        Util::datasetFieldChanged($data, 'distribution')
        ) {
        // Consider this changed since one the three things tracked by an
        // archive changed (title, modified or distribution).
        return TRUE;
      }
    }
    return FALSE;
  }

  /**
   * Build an array of entity reference targets from an array of ids.
   *
   * @param array $ids
   *   The entity ids.
   *
   * @return array
   *   An array targets that can be handed to an entity reference field.
   */
  public static function buildEntityReferenceTargets(array $ids): array {
    $targets = [];
    foreach ($ids as $id) {
      $targets[] = ['target_id' => $id];
    }
    return $targets;
  }

  /**
   * Check if a specific field changed between original and current.
   *
   * @param \Drupal\node\NodeInterface $data
   *   The data object.
   * @param string $field_name
   *   The name of the field to check.
   *
   * @return bool
   *   TRUE if the field changed, FALSE if not. If there is no original, there
   *   is no change.
   */
  public static function datasetFieldChanged(NodeInterface $data, string $field_name): bool {
    $original = $data->original ?? NULL;
    if ($original) {
      $old_value = Util::grabMetadata($original, $field_name);
      // @todo work out that time is different on save than on harvest so it
      // looks like it changed when it didn't (one has h:m:s the other doesn't).
      $new_value = Util::grabMetadata($data, $field_name);
      return $old_value !== $new_value;
    }
    // There is no original, so nothing changed.
    return FALSE;
  }

  /**
   * Use instead of PHP's date() to get date in site default time zone.
   *
   * @param string $date
   *   String date to set or modify, defaults to 'now'.
   *
   * @return \Drupal\Core\Datetime\DrupalDateTime
   *   Current date time .
   */
  public static function date($date = 'now'): DrupalDateTime {
    $timezone = \Drupal::config('system.date')->get('timezone')['default'] ?? 'UTC';
    $datetime_object = new DrupalDateTime($date, $timezone);
    return $datetime_object;
  }

  /**
   * Get the path to the public files directory.
   *
   * @return string
   *   Path to the public files directory.
   */
  public static function getDrupalPublicFilesDirectory(): string {
    return \Drupal::service('file_system')->realpath('public://');
  }

  /**
   * Get the path to the public files directory.
   *
   * @return string
   *   Path to the public files directory.
   */
  public static function getDrupalPrivateFilesDirectory(): string {
    $where_private = \Drupal::config('dkan_dataset_archiver.settings')->get('archive_private') ?? '0';
    if ($where_private === 'in_private') {
      return \Drupal::service('file_system')->realpath('private://');

    }
    return \Drupal::service('file_system')->realpath('public://');
  }

  /**
   * Get the file scheme for stream.
   *
   * @param bool $private
   *   Whether the item should be treated as private or not.
   *
   * @return string
   *   The file scheme either public:// or private://.
   */
  public static function getFileScheme(bool $private): string {
    $scheme = 'public://';
    $where_private = \Drupal::config('dkan_dataset_archiver.settings')->get('archive_private') ?? '0';
    if ($where_private === 'in_private' && $private) {
      $scheme = 'private://';
    }
    return $scheme;
  }

  /**
   * Deduce a local file stream from a file URL.
   *
   * @param string $file_url
   *   The file URL.
   * @param bool $private
   *   Whether the item should be treated as private or not.
   *
   * @return string|null
   *   The local file stream or NULL if it could not be deduced.
   */
  public static function deduceLocaleFileStream(string $file_url, bool $private = FALSE): ?string {
    // Split on /files/ to get the relative path.
    // There is a risk of the assumption that the file url may contain a path
    // that looks like Drupal either because it is a different site that is
    // also Drupal or because the path contains /files/ for some other reason.
    // Checks for '/site' because in some cases it is /sites/ or /site/.
    $pieces = explode('/files/', $file_url);
    if (!empty($pieces[1]) && str_contains($file_url, '/site')) {
      $relative_path = $pieces[1];
      $scheme = Util::getFileScheme($private);
      $local_file_stream = "{$scheme}{$relative_path}";
      return $local_file_stream;
    }
    return NULL;

  }

  /**
   * A wrapper around the static getDrupalPublicFilesDirectory function.
   *
   * @return string
   *   Path to the public files directory.
   */
  public function getDrupalPublicFilesDir() {
    return self::getDrupalPublicFilesDirectory();
  }

  /**
   * Grab metadata property from a dataset node json_metadata field.
   *
   * @param \Drupal\node\NodeInterface $data
   *   The data node.
   * @param string $property_name
   *   The name of the property to look up.
   *
   * @return mixed
   *   The content of the property, or NULL if not found.
   */
  public static function grabMetadata(NodeInterface $data, string $property_name): mixed {
    $metadata = json_decode($data->get('field_json_metadata')->getString());
    return $metadata->$property_name ?? NULL;
  }

  /**
   * Check if this data is a dataset.
   *
   * @param \Drupal\node\NodeInterface $data
   *   The data object.
   */
  public static function isDataset(NodeInterface $data): bool {
    return ($data->getType() === 'data') ? TRUE : FALSE;
  }

  /**
   * Check if a dataset is private.
   *
   * @param \Drupal\node\NodeInterface $dataset
   *   The data object.
   *
   * @return bool
   *   TRUE if the dataset is private, FALSE if not.
   */
  public static function isDatasetPrivate(NodeInterface $dataset): bool {
    $accessLevel = Util::grabMetadata($dataset, 'accessLevel');
    return self::isConsideredPrivate($accessLevel);
  }

  /**
   * Check if an access level means the dataset should be treated as private.
   *
   * @param string $access_level
   *   The access level of a dataset or archive.
   *
   * @return bool
   *   TRUE if the dataset or archive is private, FALSE if not.
   */
  public static function isConsideredPrivate(string $access_level): bool {
    $treat_as_private = \Drupal::config('dkan_dataset_archiver.settings')->get('treat_as_private') ?? '0';
    $private = FALSE;
    switch ($treat_as_private) {
      case 'public':
        $private = TRUE;
        break;

      case 'restricted public':
        if (in_array($access_level, ['restricted public', 'non-public'])) {
          $private = TRUE;
        }
        break;

      case 'non-public':
        if ($access_level === 'non-public') {
          $private = TRUE;
        }
        break;

      case '0':
      default:
        $private = FALSE;
    }

    return $private;
  }

  /**
   * Get the access levels that are considered private based on settings.
   *
   * @return array
   *   An array of access levels that are considered private.
   *   Possible values: 'public', 'restricted public', 'non-public'.
   */
  public static function getAccessLevelsThatAreConsideredPrivate(): array {
    $treat_as_private = \Drupal::config('dkan_dataset_archiver.settings')->get('treat_as_private') ?? '0';
    $private = [];
    switch ($treat_as_private) {
      case 'public':
        $private[] = 'public';
        break;

      case 'restricted public':
        $private[] = 'restricted public';
        $private[] = 'non-public';

        break;

      case 'non-public':
        $private[] = 'non-public';
        break;

      case '0':
      default:
        $private = [];
    }

    return $private;
  }

  /**
   * Get the access levels that are considered Public based on settings.
   *
   * @return array
   *   An array of access levels that are considered public.
   *   Possible values: 'public', 'restricted public', 'non-public'.
   */
  public static function getAccessLevelsThatAreConsideredPublic(): array {
    $private_levels = self::getAccessLevelsThatAreConsideredPrivate();
    $all_levels = ['public', 'restricted public', 'non-public'];
    return array_diff($all_levels, $private_levels);
  }

  /**
   * Adjust storage location based on access level an locations.
   *
   * @param string $url
   *   Schemed or schemeless URL to be adjusted.
   * @param bool $is_private
   *   Whether the item should be treated as private or not.
   *
   * @return string
   *   The adjusted URL with stream wrapper if it came in with one.
   */
  public static function adjustStorageLocation(string $url, bool $is_private): string {
    $contained_scheme = preg_match('/^[a-zA-Z]+:\/\//', $url);
    $files_matches = [];
    preg_match('/^(.*\/files\/)/', $url, $files_matches);
    $anything_in_front_of_files = $files_matches[1] ?? '';
    $dataset_archives_matches = [];
    preg_match('/^(.*\/dataset-archives\/)/', $url, $dataset_archives_matches);
    $anything_in_front_of_dataset_archives = $dataset_archives_matches[1] ?? '';
    $storage_location_for_private = \Drupal::config('dkan_dataset_archiver.settings')->get('archive_private') ?? '0';
    // Strip all the possible stuff first, then put it back after.
    $strings_to_remove = [
      // In order from most specific to least.
      'private://dataset-archives/private',
      'public://dataset-archives/private',
      'private://dataset-archives',
      'public://dataset-archives',
      'private://',
      'public://',
      $anything_in_front_of_files,
      $anything_in_front_of_dataset_archives,
      'dataset-archives/',
    ];
    $url = str_replace($strings_to_remove, '', $url);
    $path = trim($url, '/');
    switch (TRUE) {
      case ($is_private && $storage_location_for_private === 'in_private'):
        // Force the private stream and private location.
        if (!$contained_scheme) {
          $anything_in_front_of_dataset_archives = str_replace('dataset-archives/', '', $anything_in_front_of_dataset_archives);
          $anything_in_front_of_files = $anything_in_front_of_dataset_archives;
        }

        $path = "dataset-archives/{$path}";
        $scheme = 'private://';
        break;

      case ($is_private && $storage_location_for_private === 'in_public'):
        // Use the public stream but a private sub-directory.
        $path = "dataset-archives/private/{$path}";
        $scheme = 'public://';
        break;

      case (!$is_private):
      default:
        // Make sure we are using the public stream.
        $path = "dataset-archives/{$path}";
        $scheme = 'public://';

    }
    $new_url = ($contained_scheme) ? "{$scheme}{$path}" : "{$anything_in_front_of_files}{$path}";

    return $new_url;
  }

  /**
   * Make a string machine friendly.
   *
   * @param string $string
   *   String to machinize.
   *
   * @return string
   *   Machinized string.
   */
  public static function machinize($string): string {
    $new = preg_replace('/[^A-Za-z0-9 ]/', '', $string);
    $size = strlen($new);
    if ($size > 40) {
      $excess = $size - 40;
      $new = substr($new, $excess);
    }

    return $new;
  }

  /**
   * Create cache tags for an aggregation type and name, used for clearing.
   *
   * @param string $type
   *   The type of aggregation: aggregate, annual, current, individual.
   * @param string|null $aggregate_of
   *   The type of aggregation: 'keyword' or 'theme'.
   * @param string $theme_or_keyword
   *   The theme or keyword to aggregate on. Not needed for 'annual' type.
   *
   * @return array
   *   An array of cache tags.
   */
  public static function getAggregationTagsToClear(string $type, ?string $aggregate_of = NULL, string $theme_or_keyword = ''): array {
    $tags = [];
    // Get the direct tag.
    $tags[] = self::getAggregationTag($type, $aggregate_of, $theme_or_keyword, TRUE);
    $tags[] = self::getAggregationTag($type, $aggregate_of, $theme_or_keyword, FALSE);
    $tags = array_filter($tags);
    // Add adjacent tags.
    $theme_or_keyword = strtolower(Util::machinize($theme_or_keyword));
    switch ($type) {
      case 'annual':
        // Annuals are also included with keyword results so need both.
        $tags[] = "archive:api:{$type}:{$aggregate_of}:{$theme_or_keyword}:absolute";
        $tags[] = "archive:api:{$type}:{$aggregate_of}:{$theme_or_keyword}:relative";
        $tags[] = "archive:api:{$type}:{$aggregate_of}:all:absolute";
        $tags[] = "archive:api:{$type}:{$aggregate_of}:all:relative";
        break;

      case 'current':
        // Current archives do not need any adjacent tags cleared.
        // @todo confirm this.
        break;

      case 'individual':
        // Individual archives do not have correct direct tags, they are odd.
        // Remove the direct tag.
        $tags = [];
        $tags[] = "archive:api:{$type}:{$aggregate_of}:{$theme_or_keyword}:absolute";
        $tags[] = "archive:api:{$type}:{$aggregate_of}:{$theme_or_keyword}:relative";
        $tags[] = "archive:api:{$type}:{$aggregate_of}:{$theme_or_keyword}:absolute";
        $tags[] = "archive:api:{$type}:{$aggregate_of}:{$theme_or_keyword}:relative";
        $tags[] = "archive:api:{$type}:{$aggregate_of}:all:absolute";
        $tags[] = "archive:api:{$type}:{$aggregate_of}:all:relative";
        $tags[] = "archive:api:{$type}:{$aggregate_of}:all:absolute";
        $tags[] = "archive:api:{$type}:{$aggregate_of}:all:relative";

        break;

      default:
        // No additional tags needed.
        break;
    }
    return $tags;
  }

  /**
   * Create the primary cache tag for an aggregation, used for setting.
   *
   * @param string $type
   *   The type of aggregation: aggregate, annual, current, individual.
   * @param string|null $aggregate_of
   *   The type of aggregation: 'keyword' or 'theme'.
   * @param string $theme_or_keyword
   *   The theme or keyword to aggregate. Or a dataset_id if no aggregate_of.
   * @param bool $absolute
   *   Whether to make the URL absolute.
   *
   * @return string
   *   The cache tag.
   */
  public static function getAggregationTag(string $type, ?string $aggregate_of = NULL, ?string $theme_or_keyword = NULL, bool $absolute = TRUE): string {
    $absolute_string = $absolute ? 'absolute' : 'relative';
    $theme_or_keyword = strtolower(Util::machinize($theme_or_keyword));
    switch ($type) {
      case 'aggregate':
      case 'annual':
      case 'current':
        if (empty($aggregate_of)) {
          return "archive:api:{$type}:{$absolute_string}";
        }
        return "archive:api:{$type}:{$aggregate_of}:{$theme_or_keyword}:{$absolute_string}";

      case 'individual':
        if (empty($aggregate_of && !empty($theme_or_keyword))) {
          $dataset_id = $theme_or_keyword;
          return "archive:api:{$type}:{$dataset_id}:{$absolute_string}";
        }
        elseif (!empty($aggregate_of) && !empty($theme_or_keyword)) {
          return "archive:api:{$type}:{$aggregate_of}:{$theme_or_keyword}:{$absolute_string}";
        }
        return "archive:api:{$type}:{$absolute_string}";

      default:
        // No tag for unknown type.
        return '';
    }
  }

  /**
   * Make a string file name friendly.
   *
   * @param string $string
   *   String to file name-ize.
   *
   * @return string
   *   File name friendly string.
   */
  public static function fileNameIze($string): string {
    $new = preg_replace('/[^A-Za-z0-9_\-]/', '-', $string);
    $new = strtolower($new);

    return $new;
  }

  /**
   * Get a list of values from a multi-value field.
   *
   * @param \Drupal\Core\Field\FieldItemListInterface|null $field
   *   The field to extract values from.
   * @param string $attribute
   *   The attribute to extract from each item. Defaults to 'value'.
   *
   * @return array
   *   An array of values.
   */
  public static function getListOfMultiValues(?FieldItemListInterface $field, string $attribute = 'value') : array {
    $values = [];
    if ($field && !$field->isEmpty()) {
      foreach ($field as $item) {
        (!empty($item->$attribute)) ? $values[] = $item->$attribute : NULL;
      }
    }
    return $values;
  }

  /**
   * Get a list of Urls from a multi-value link field.
   *
   * @param \Drupal\Core\Field\FieldItemListInterface|null $field
   *   The field to extract urls from.
   * @param bool $absolute
   *   Whether to create absolute URLs.
   *
   * @return array
   *   An array of URLs.
   */
  public static function getListOfMultiValueUris(?FieldItemListInterface $field, bool $absolute = TRUE) : array {
    $values = [];
    if ($field && !$field->isEmpty()) {
      foreach ($field as $item) {
        /** @var \Drupal\file\Plugin\Field\FieldType\FileItem $item */
        $file_entity = $item->entity;

        $url = $file_entity->createFileUrl(!$absolute);
        (!empty($url)) ? $values[] = $url : NULL;
      }
    }
    return $values;
  }

  /**
   * Create an archive's path, drupal path, and filename based on archive type.
   *
   * @param string $type
   *   Archive type: 'aggregate', 'current', 'annual'.
   * @param string $aggregate_of
   *   The type of aggregation: 'keyword' or 'theme'.
   * @param string $theme_or_keyword
   *   Theme or keyword to aggregate on.
   * @param string $dataset_id
   *   Dataset id. Only needed for 'individual' type.
   * @param string|null $date
   *   Date to use in filename. Must be Y-md, Y-m or Y. Defaults to today.
   *
   * @return string
   *   Path of a file based on args without leading or trailing slashes.
   *   Example: dataset-archives/keyword/health
   */
  public static function createArchiveFilePath(
    string $type,
    ?string $aggregate_of = NULL,
    ?string $theme_or_keyword = NULL,
    string $dataset_id = '',
    ?string $date = NULL,
  ): string {
    $date = (empty($date) ? Util::date()->format('Y-m-d') : $date);
    $machine_name = Util::fileNameIze($theme_or_keyword);
    switch (TRUE) {
      case ($type === 'individual' && !empty($dataset_id)):
        $directory = "dataset-archives/individual/{$dataset_id}";
        break;

      case ($type === 'aggregate' && !empty($aggregate_of) && !empty($theme_or_keyword)):
      case ($type === 'annual' && !empty($aggregate_of) && !empty($theme_or_keyword)):
        $directory = "dataset-archives/{$aggregate_of}/{$machine_name}";
        break;

      case ($type === 'current' && !empty($aggregate_of) && !empty($theme_or_keyword)):
        $directory = "dataset-archives/{$type}/{$aggregate_of}";
        break;

      case ($type === 'annual'):
      case ($type === 'current'):
        $directory = "dataset-archives/{$type}";
        break;

      default:
        $directory = "dataset-archives/unknown-type";
        break;
    }
    return $directory;
  }

  /**
   * Create an archive's filename.
   *
   * @param string $type
   *   Archive type: 'keyword', 'annual_keyword', 'theme', 'annual_theme',
   *   'annual', or current.
   * @param string $aggregate_of
   *   The type of aggregation: 'keyword' or 'theme'.
   * @param string $theme_or_keyword
   *   Theme or keyword to aggregate on.
   * @param string $dataset_id
   *   Dataset id. Only needed for 'individual' type.
   * @param string|null $date
   *   Date to use in the filename. Must be Y-md, Y-m or Y. Defaults to today.
   * @param string $original_filename
   *   Original filename w/ ext to use in the individual archive filename.
   *
   * @return string
   *   The filename of an archive file including its extension.
   *   Example: dataset-id_2025-10-05_filename.zip
   */
  public static function createArchiveFilename(
    string $type,
    ?string $aggregate_of = NULL,
    string $theme_or_keyword = '',
    string $dataset_id = '',
    ?string $date = '',
    string $original_filename = '',
  ): string {
    $annual_year = (empty($date)) ? Util::date()->format('Y') : Util::date($date)->format('Y');
    $date = (empty($date) ? Util::date()->format('Y-m-d') : $date);
    $machine_name = Util::fileNameIze($theme_or_keyword);

    switch (TRUE) {
      case ($type === 'individual' && !empty($dataset_id) && !empty($original_filename)):
        $filename = "{$dataset_id}_{$date}_{$original_filename}";
        break;

      case ($type === 'aggregate' && !empty($aggregate_of) && !empty($theme_or_keyword)):
        $filename = "{$machine_name}_{$date}.zip";
        break;

      case ($type === 'annual' && !empty($aggregate_of) && !empty($theme_or_keyword)):
        $filename = "{$machine_name}_annual_{$annual_year}.zip";
        break;

      case ($type === 'annual'):
        $site_name = \Drupal::config('system.site')->get('name');
        $machine_name = Util::fileNameIze($site_name);
        $filename = "{$machine_name}_annual_{$annual_year}.zip";
        break;

      case ($type === 'current'):
        $filename = "{$aggregate_of}_{$machine_name}_current.zip";
        break;

      default:
        // We don't know what this was so pass it through.
        $filename = $original_filename;
        break;
    }
    return $filename;
  }

  /**
   * Verify a directory exists, or create it.
   *
   * @param string $directory
   *   Directory.
   *
   * @return bool
   *   TRUE if it exists (or was created) and is writable. FALSE otherwise.
   */
  public static function prepareDirectory(string $directory) : bool {
    /** @var \Drupal\Core\File\FileSystem $fs */
    $fs = \Drupal::service('file_system');
    $flags = FileSystem::CREATE_DIRECTORY | FileSystem::MODIFY_PERMISSIONS;
    return $fs->prepareDirectory($directory, $flags);
  }

  /**
   * A wrapper around the static prepareDirectory function.
   *
   * @param string $directory
   *   Path to directory to prepare.
   *
   * @return bool
   *   TRUE if it exists (or was created) and is writable. FALSE otherwise.
   */
  public function prepareDir(string $directory) {
    return self::prepareDirectory($directory);
  }

  /**
   * Wraps static Term creation function so it can be mocked in phpunit.
   *
   * @param string $termName
   *   The name of the term.
   * @param string $taxonomyName
   *   The vocabulary the term belongs to.
   *
   * @return \Drupal\taxonomy\Entity\Term
   *   The metastore search api service.
   */
  public function createTaxonomyTerm(string $termName, string $taxonomyName): Term {
    return Term::create([
      'name' => $termName,
      'vid' => $taxonomyName,
    ]);
  }

}
