<?php

namespace Drupal\bibcite_import_oai\Commands;

use Drush\Commands\DrushCommands;

/**
 * Drush commands.
 */
class OaiImportCommands extends DrushCommands {

  /**
   * Imports data from an OAI URL.
   *
   * @param array $options
   *   Command options.
   *
   * @command bibcite_import_oai:import
   *
   * @option url URL for OAI XML data
   * @aliases oai,oai-import
   */
  public function import(array $options = ['url' => NULL]) {
    $url = $options['url'];
    if (empty($url)) {
      throw new \Exception('Error: Please specify URL.');
    }
    else {
      // Filter the URL value.
      $url = filter_var($url, FILTER_VALIDATE_URL);
      if ($url === FALSE) {
        throw new \Exception('Error: URL is invalid.');
      }
      $importer = \Drupal::service('bibcite_import_oai.importer');
      try {
        $imported = $importer->importDataFromUrl($url);
        $this->output->writeln(sprintf('Imported %s references', $imported));
      }
      catch (\Exception $e) {
        throw new \Exception($e->getMessage());
      }
    }
  }

  /**
   * Deletes all references from bibcite.
   *
   * @command bibcite_import_orcid:delete_refs
   * @aliases ouda
   * @usage bibcite_import_orcid:delete_refs
   */
  public function deleteRefs() {
    $refs_to_delete = \Drupal::entityQuery('bibcite_reference')
      ->accessCheck(FALSE)
      ->execute();
    echo "Deleting " . count($refs_to_delete) . " References.\n";
    $storage_handler = \Drupal::entityTypeManager()->getStorage("bibcite_reference");
    $entities = $storage_handler->loadMultiple($refs_to_delete);
    $storage_handler->delete($entities);
  }

  /**
   * Deletes all contributors from bibcite.
   *
   * @command bibcite_import_orcid:delete_contribs
   * @aliases ouda
   * @usage bibcite_import_orcid:delete_contribs
   */
  public function deleteContribs() {
    $contribs_to_delete = \Drupal::entityQuery('bibcite_contributor')
      ->accessCheck(FALSE)
      ->execute();
    echo "Deleting " . count($contribs_to_delete) . " Conntributors.\n";
    $storage_handler = \Drupal::entityTypeManager()->getStorage("bibcite_contributor");
    $entities = $storage_handler->loadMultiple($contribs_to_delete);
    $storage_handler->delete($entities);
  }

  /**
   * Deletes all entities from bibcite.
   *
   * @command bibcite_import_orcid:delete_all
   * @aliases ouda
   * @usage bibcite_import_orcid:delete_all
   */
  public function deleteAll() {

    // Delete References.
    $this->deleteRefs();

    // Delete Contributors.
    $this->deleteContribs();
  }

  /**
   * Removes duplicate contributors by merging them.
   *
   * @command bibcite_import_oai:deduplicate_contributors
   * @aliases oai-dedup
   * @usage bibcite_import_oai:deduplicate_contributors
   */
  public function deduplicateContributors() {
    $this->output->writeln('Starting contributor deduplication...');

    $connection = \Drupal::database();
    $storage_handler = \Drupal::entityTypeManager()->getStorage('bibcite_contributor');

    // Find duplicate contributor groups by name.
    $query = $connection->query("
      SELECT first_name, last_name, COUNT(*) as count, MIN(id) as keep_id
      FROM {bibcite_contributor}
      GROUP BY first_name, last_name
      HAVING COUNT(*) > 1
      ORDER BY count DESC
    ");

    $total_duplicates = 0;
    $total_merged = 0;
    $groups_processed = 0;

    foreach ($query as $row) {
      $groups_processed++;
      $first_name = $row->first_name;
      $last_name = $row->last_name;
      $keep_id = $row->keep_id;
      $duplicate_count = $row->count - 1;

      // Find all IDs for this contributor (except the one we're keeping).
      $duplicate_ids_query = $connection->select('bibcite_contributor', 'bc')
        ->fields('bc', ['id'])
        ->condition('first_name', $first_name, is_null($first_name) ? 'IS NULL' : '=')
        ->condition('last_name', $last_name, is_null($last_name) ? 'IS NULL' : '=')
        ->condition('id', $keep_id, '!=')
        ->execute();

      $duplicate_ids = [];
      foreach ($duplicate_ids_query as $id_row) {
        $duplicate_ids[] = $id_row->id;
      }

      if (empty($duplicate_ids)) {
        continue;
      }

      // Update all references in batches to avoid memory issues.
      $batches = array_chunk($duplicate_ids, 1000);
      foreach ($batches as $batch) {
        // Update reference authors field.
        $connection->update('bibcite_reference__author')
          ->fields(['author_target_id' => $keep_id])
          ->condition('author_target_id', $batch, 'IN')
          ->execute();

        // Update reference authors revision field.
        try {
          $connection->update('bibcite_reference_revision__author')
            ->fields(['author_target_id' => $keep_id])
            ->condition('author_target_id', $batch, 'IN')
            ->execute();
        }
        catch (\Exception $e) {
          // Table doesn't exist, skip it.
        }

        // Update reference editors field if it exists.
        try {
          $connection->update('bibcite_reference__editor')
            ->fields(['editor_target_id' => $keep_id])
            ->condition('editor_target_id', $batch, 'IN')
            ->execute();
        }
        catch (\Exception $e) {
          // Table doesn't exist, skip it.
        }

        // Update reference editors revision field if it exists.
        try {
          $connection->update('bibcite_reference_revision__editor')
            ->fields(['editor_target_id' => $keep_id])
            ->condition('editor_target_id', $batch, 'IN')
            ->execute();
        }
        catch (\Exception $e) {
          // Table doesn't exist, skip it.
        }

        // Update user field_author if it exists.
        try {
          $connection->update('user__field_author')
            ->fields(['field_author_target_id' => $keep_id])
            ->condition('field_author_target_id', $batch, 'IN')
            ->execute();
        }
        catch (\Exception $e) {
          // Table doesn't exist, skip it.
        }

        // Delete the duplicate contributors.
        $duplicate_entities = $storage_handler->loadMultiple($batch);
        $storage_handler->delete($duplicate_entities);

        $total_merged += count($batch);
      }

      $total_duplicates += $duplicate_count;

      if ($groups_processed % 10 == 0) {
        $name = trim("$first_name $last_name");
        $this->output->writeln("Processed {$groups_processed} groups ({$total_merged} duplicates removed). Last: '{$name}'");
      }
    }

    $this->output->writeln("Deduplication complete!");
    $this->output->writeln("Groups processed: {$groups_processed}");
    $this->output->writeln("Total duplicate contributors removed: {$total_merged}");
  }

}
