<?php

namespace Drupal\fast_revision_purge\Service;

use Drupal\Core\Database\Connection;
use Psr\Log\LoggerInterface;

/**
 * Truncates non-current Paragraph revisions (meta + field revision rows).
 *
 * Safety/approach:
 * - Detects actual paragraph base/revision table names via entity definition,
 *   with fallback to legacy Paragraphs schema (paragraphs_item*).
 * - Only deletes rows whose revision_id is NOT the current one recorded on the
 *   base/data table (whichever exists with revision_id).
 * - Operates in chunks using a temporary table of candidate revision IDs.
 * - Wraps deletes in a DB transaction per chunk.
 */
class ParagraphRevisionTruncator {

  /**
   * @param \Drupal\Core\Database\Connection $db
   *   Drupal DB connection.
   * @param \Psr\Log\LoggerInterface $logger
   *   Logger channel.
   */
  public function __construct(
    private Connection $db,
    private LoggerInterface $logger,
  ) {}

  /**
   * Build the detected paragraph base/revision table names (if available).
   *
   * Prefers entity-definition tables; falls back to legacy tables.
   *
   * @return array{
   *   base: string|null,
   *   base_alt: string|null,
   *   rev: string|null
   * }
   *   base: primary table that has (id, revision_id) for "current" pointer.
   *   base_alt: optional alternate table with (id, revision_id) if base missing.
   *   rev: revision meta table (holds (id, revision_id, ...)).
   */
  protected function detectTables(): array {
    $schema = $this->db->schema();

    $base = $baseAlt = $rev = null;

    // Try entity-definition first (newer schema names).
    try {
      $def = \Drupal::entityTypeManager()->getDefinition('paragraph', FALSE);
      if ($def) {
        $candBase = (string) ($def->get('base_table') ?? '');
        $candData = (string) ($def->get('data_table') ?? '');
        $candRev  = (string) ($def->get('revision_table') ?? '');

        if ($candRev && $schema->tableExists($candRev)) {
          $rev = $candRev;
        }

        // Choose base that exists and has revision_id.
        if ($candBase && $schema->tableExists($candBase) && $schema->fieldExists($candBase, 'revision_id')) {
          $base = $candBase;
        }
        elseif ($candData && $schema->tableExists($candData) && $schema->fieldExists($candData, 'revision_id')) {
          $base = $candData;
        }

        // Keep the other as base_alt if present.
        if ($base && isset($candBase, $candData) && $candBase !== $base && $schema->tableExists($candBase) && $schema->fieldExists($candBase, 'revision_id')) {
          $baseAlt = $candBase;
        }
        elseif ($base && isset($candBase, $candData) && $candData !== $base && $schema->tableExists($candData) && $schema->fieldExists($candData, 'revision_id')) {
          $baseAlt = $candData;
        }
      }
    }
    catch (\Throwable $e) {
    }

    // Fallback to legacy schema names if needed.
    if (!$rev) {
      if ($schema->tableExists('paragraphs_item_revision')) {
        $rev = 'paragraphs_item_revision';
      }
      elseif ($schema->tableExists('paragraph_revision')) {
        $rev = 'paragraph_revision';
      }
    }

    if (!$base) {
      if ($schema->tableExists('paragraphs_item') && $schema->fieldExists('paragraphs_item', 'revision_id')) {
        $base = 'paragraphs_item';
      }
      elseif ($schema->tableExists('paragraphs_item_field_data') && $schema->fieldExists('paragraphs_item_field_data', 'revision_id')) {
        $base = 'paragraphs_item_field_data';
      }
    }

    // Optional alternate base for safety if both exist.
    if (!$baseAlt) {
      if ($base !== 'paragraphs_item' && $schema->tableExists('paragraphs_item') && $schema->fieldExists('paragraphs_item', 'revision_id')) {
        $baseAlt = 'paragraphs_item';
      }
      elseif ($base !== 'paragraphs_item_field_data' && $schema->tableExists('paragraphs_item_field_data') && $schema->fieldExists('paragraphs_item_field_data', 'revision_id')) {
        $baseAlt = 'paragraphs_item_field_data';
      }
    }

    return ['base' => $base, 'base_alt' => $baseAlt, 'rev' => $rev];
  }

  /**
   * PLAN: Estimate rows/bytes that would be deleted.
   *
   * @return array{rows:int, approx_bytes:int}
   *   rows: number of non-current rows in the paragraph revision table.
   *   approx_bytes: estimate via AVG_ROW_LENGTH * rows (0 if unknown).
   */
  public function plan(): array {
    $tables = $this->detectTables();
    $base = $tables['base'];
    $rev  = $tables['rev'];

    if (!$base || !$rev) {
      return ['rows' => 0, 'approx_bytes' => 0];
    }

    // Count non-current paragraph revisions safely.
    $rows = (int) $this->db->query("
      SELECT COUNT(*)
      FROM {$rev} pr
      JOIN {$base} p ON p.id = pr.id
      WHERE pr.revision_id <> p.revision_id
    ")->fetchField();

    // Estimate size: information_schema first, fallback to SHOW TABLE STATUS.
    $avg = 0;
    try {
      $schemaName = (string) ($this->db->getConnectionOptions()['database'] ?? '');
      if ($schemaName !== '') {
        $avg = (int) ($this->db->query(
          "SELECT AVG_ROW_LENGTH
           FROM information_schema.TABLES
           WHERE TABLE_SCHEMA = :s AND TABLE_NAME = :t",
          [':s' => $schemaName, ':t' => $rev]
        )->fetchField() ?: 0);
      }
    } catch (\Throwable $e) {}
    if ($avg === 0) {
      try {
        // Escape underscore for LIKE. We only need a direct match.
        $like = strtr($rev, ['_' => '\_']);
        $row = $this->db->query("SHOW TABLE STATUS LIKE '{$like}'")->fetchObject();
        $avg = (int) ($row->Avg_row_length ?? 0);
      } catch (\Throwable $e) {}
    }

    return ['rows' => $rows, 'approx_bytes' => $rows * $avg];
  }

  /**
   * EXECUTE: Keep only current paragraph revision rows; delete others in chunks.
   *
   * Deletes from the revision meta table and all relevant paragraph revision
   * field tables in each chunk.
   *
   * Included field tables:
   *  - `${rev}__*`                (if present)
   *  - `paragraphs_item_revision_field_data`
   *  - `paragraph_field_revision`
   *  - `paragraph_revision__*`
   *
   * @param int $chunk
   *   Rows to delete per loop (default 5000; min 1000).
   *
   * @return array{deleted:int}
   *   deleted: number of rows deleted from the *meta* revision table. Field
   *   tables will delete the same set of revision_ids across all tables.
   */
  public function execute(int $chunk = 5000): array {
    $tables = $this->detectTables();
    $base = $tables['base'];
    $baseAlt = $tables['base_alt'];
    $rev  = $tables['rev'];

    if (!$base || !$rev) {
      return ['deleted' => 0];
    }

    $chunk = max(1000, (int) $chunk);
    $deleted = 0;

    do {
      $this->dropTemp('tmp_par_rev_ids');
      $this->createTempTable('tmp_par_rev_ids', '(id INT UNSIGNED PRIMARY KEY)');

      // Gather candidate non-current revision_ids (limit per chunk).
      $this->db->query("
        INSERT INTO tmp_par_rev_ids (id)
        SELECT pr.revision_id
        FROM {$rev} pr
        JOIN {$base} p ON p.id = pr.id
        WHERE pr.revision_id <> p.revision_id
        LIMIT {$chunk}
      ");

      // If nothing found and we have an alternate base (some sites store pointers there).
      $round = (int) $this->db->query("SELECT COUNT(*) FROM tmp_par_rev_ids")->fetchField();
      if ($round === 0 && $baseAlt) {
        $this->db->query("
          INSERT INTO tmp_par_rev_ids (id)
          SELECT pr.revision_id
          FROM {$rev} pr
          JOIN {$baseAlt} p ON p.id = pr.id
          WHERE pr.revision_id <> p.revision_id
          LIMIT {$chunk}
        ");
        $round = (int) $this->db->query("SELECT COUNT(*) FROM tmp_par_rev_ids")->fetchField();
      }

      if ($round === 0) {
        $this->dropTemp('tmp_par_rev_ids');
        break;
      }

      $txn = $this->db->startTransaction();

      // Delete from the meta revision table.
      $this->db->query("
        DELETE pr
        FROM {$rev} pr
        JOIN tmp_par_rev_ids t ON t.id = pr.revision_id
      ");

      // Delete from field-revision tables tied to paragraph revisions.
      foreach ($this->discoverFieldRevisionTables($rev) as $tbl) {
        $this->db->query("
          DELETE r
          FROM `{$tbl}` r
          JOIN tmp_par_rev_ids t ON t.id = r.revision_id
        ");
      }

      unset($txn);

      $deleted += $round;
      $this->dropTemp('tmp_par_rev_ids');
    } while ($round > 0);

    $this->logger->notice('Paragraph revision cleanup deleted @count revision rows (meta + fields).', [
      '@count' => number_format($deleted),
    ]);

    return ['deleted' => $deleted];
  }

  /**
   * Create a TEMPORARY table, prefer ENGINE=MEMORY, fall back gracefully.
   */
  private function createTempTable(string $name, string $columnsSql): void {
    try {
      $this->db->query("CREATE TEMPORARY TABLE {$name} {$columnsSql} ENGINE=MEMORY");
    } catch (\Throwable $e) {
      $this->db->query("CREATE TEMPORARY TABLE {$name} {$columnsSql}");
    }
  }

  /**
   * Drop a TEMPORARY table if it exists.
   */
  private function dropTemp(string $name): void {
    try {
      $this->db->query("DROP TEMPORARY TABLE IF EXISTS {$name}");
    } catch (\Throwable $e) {
    }
  }

  /**
   * Discover all paragraph revision field tables we should clean against.
   *
   * @param string $rev
   *   The paragraph revision meta table (e.g., 'paragraphs_item_revision' or 'paragraph_revision').
   *
   * @return string[]
   *   Safe, backticked-ready table names (validated).
   */
  private function discoverFieldRevisionTables(string $rev): array {
    $schema = $this->db->schema();
    $dbName = (string) ($this->db->getConnectionOptions()['database'] ?? '');
    if ($dbName === '') {
      return [];
    }

    $tables = [];

    // Pattern 1: ${rev}__* (works if such tables exist)
    $likeRevPrefix = strtr($rev, ['_' => '\\_']) . '\\__%';
    $tables = array_merge($tables, $this->listTablesLike($dbName, $likeRevPrefix));

    // Pattern 2: legacy "paragraphs_item_revision_field_data"
    if ($schema->tableExists('paragraphs_item_revision_field_data')) {
      $tables[] = 'paragraphs_item_revision_field_data';
    }

    // Pattern 3: newer "paragraph_field_revision"
    if ($schema->tableExists('paragraph_field_revision')) {
      $tables[] = 'paragraph_field_revision';
    }

    // Pattern 4: site-specific explosion of per-field tables: "paragraph_revision__*"
    $tables = array_merge($tables, $this->listTablesLike($dbName, 'paragraph\\_revision\\__%'));

    // Whitelist: only simple names.
    $tables = array_values(array_filter($tables, static function ($t) {
      return is_string($t) && preg_match('/^[A-Za-z0-9_]+$/', $t);
    }));

    // Ensure uniqueness.
    return array_values(array_unique($tables));
  }

  /**
   * List tables in the current schema matching a LIKE pattern (ESCAPE '\\').
   *
   * @param string $schemaName
   * @param string $like
   *
   * @return string[]
   */
  private function listTablesLike(string $schemaName, string $like): array {
    try {
      $result = $this->db->query("
        SELECT TABLE_NAME
        FROM information_schema.TABLES
        WHERE TABLE_SCHEMA = :s AND TABLE_NAME LIKE :p ESCAPE '\\\\'
      ", [':s' => $schemaName, ':p' => $like])->fetchCol();
      return array_map('strval', $result ?: []);
    } catch (\Throwable $e) {
      return [];
    }
  }

}
