<?php

namespace Drupal\fast_revision_purge\Service;

use DateTimeImmutable;
use Drupal\Core\Database\Connection;
use Psr\Log\LoggerInterface;
use Drupal\Component\Datetime\TimeInterface;

/**
 * Computes KEEP/DELETE sets for node and paragraph revisions,
 * and records a dry-run estimate of reclaimable space.
 *
 * Design:
 * - Populates four working tables:
 *   - fastrev_node_keep(vid)
 *   - fastrev_node_delete(vid)
 *   - fastrev_par_in_use(rid)
 *   - fastrev_par_delete(rid)
 * - Paragraphs logic supports both legacy (D10) and newer (D11) schemas by
 *   introspecting the paragraph revision table/columns at runtime.
 * - After planning (dry-run), we estimate "potential_claimable_space" by
 *   sampling average row size of affected revision tables and multiplying by
 *   the number of rows that would be deleted, then persist into fastrev_stats.
 */
final class Planner {

  /**
   * @param \Drupal\Core\Database\Connection $db
   *   Database connection used for all planning queries.
   * @param \Psr\Log\LoggerInterface $logger
   *   Logger for diagnostic messages (not heavily used here).
   * @param \Drupal\fast_revision_purge\Service\RevisionTableMap $map
   *   Table discovery helper for node/paragraph revision field tables.
   * @param \Drupal\fast_revision_purge\Service\DbPlatform $platform
   *   DB platform wrapper (future feature flags, driver branching).
   * @param \Drupal\fast_revision_purge\Service\StatsStorage $stats
   *   Stats storage for persisting dry-run estimate and timestamps.
   */
  public function __construct(
    private Connection $db,
    private LoggerInterface $logger,
    private RevisionTableMap $map,
    private DbPlatform $platform,
    private StatsStorage $stats,
  ) {}

  /**
   * Truncate (reset) all working tables prior to planning.
   */
  private function resetWorkingTables(): void {
    $this->db->truncate('fastrev_node_keep')->execute();
    $this->db->truncate('fastrev_node_delete')->execute();
    $this->db->truncate('fastrev_par_in_use')->execute();
    $this->db->truncate('fastrev_par_delete')->execute();

    // Optional LB planning tables (created by .install).
    $schema = $this->db->schema();
    if ($schema->tableExists('fastrev_lb_keep')) {
      $this->db->truncate('fastrev_lb_keep')->execute();
    }
    if ($schema->tableExists('fastrev_lb_delete')) {
      $this->db->truncate('fastrev_lb_delete')->execute();
    }
  }

  /**
   * Entry point: compute keep/delete sets for nodes and paragraphs (dry-run).
   *
   * After computing the working sets, this method also:
   * - Estimates potential reclaimable space (bytes) by sampling avg row sizes
   *   of affected revision tables and multiplying by the number of rows that
   *   would be deleted.
   * - Persists the estimate and last dry-run timestamp to fastrev_stats.
   *
   * @param int $keepLast
   *   Keep latest N non-default node revisions per node (or per language).
   * @param string|null $since
   *   Keep revisions since YYYY-MM-DD (converted to UNIX timestamp).
   * @param bool $protectPublished
   *   Whether to protect the latest published node revision per node.
   * @param bool $perLanguage
   *   If TRUE, partition keepLast by (nid, langcode) instead of by nid only.
   * @param int $keepParagraphLast
   *   Keep last M paragraph revisions per paragraph entity.
   *
   * @throws \InvalidArgumentException
   *   If $since is provided and is not a valid YYYY-MM-DD date.
   */
  public function plan(
    int $keepLast,
    ?string $since,
    bool $protectPublished,
    bool $perLanguage,
    int $keepParagraphLast
  ): void {
    $this->resetWorkingTables();

    $cutoff = null;
    if ($since) {
      $d = DateTimeImmutable::createFromFormat('Y-m-d', $since);
      if (!$d) {
        throw new \InvalidArgumentException('Invalid --since date. Expected YYYY-MM-DD.');
      }
      $cutoff = $d->getTimestamp();
    }

    $this->planNodeKeep($keepLast, $cutoff, $protectPublished, $perLanguage);
    $this->planNodeDelete();
    $this->computeParagraphInUse();
    $this->planParagraphDelete($keepParagraphLast);
    $this->planLayoutBuilderKeepDelete();

    try {
      $estimatedBytes = $this->estimatePotentialClaimableBytes();
      $this->stats->updateAfterDryRun($estimatedBytes);
      $this->logger->info('Dry-run estimate persisted: {bytes} bytes.', ['bytes' => $estimatedBytes]);
    }
    catch (\Throwable $e) {
      $this->logger->warning('Failed to persist dry-run estimate: @msg', ['@msg' => $e->getMessage()]);
    }
  }

  /**
   * Populate fastrev_node_keep according to policy flags.
   *
   * Steps:
   *  1) Keep default node revisions.
   *  2) Keep revisions since cutoff (if provided).
   *  3) Keep latest published per node (optional).
   *  4) Keep latest N per node (or per (node, langcode)).
   */
  private function planNodeKeep(int $keepLast, ?int $cutoff, bool $protectPublished, bool $perLanguage): void {
    // Seed from node_field_data (current vid per nid/langcode).
    $this->db->query("
      INSERT IGNORE INTO fastrev_node_keep (vid)
      SELECT DISTINCT nfd.vid
      FROM node_field_data nfd
      LEFT JOIN fastrev_node_keep k ON k.vid = nfd.vid
      WHERE nfd.vid IS NOT NULL AND k.vid IS NULL
    ");

    if ($cutoff !== null) {
      $this->db->query("
        INSERT IGNORE INTO fastrev_node_keep (vid)
        SELECT r.vid
        FROM node_revision r
        LEFT JOIN fastrev_node_keep k ON k.vid = r.vid
        WHERE r.revision_timestamp >= :cutoff
          AND k.vid IS NULL
      ", [':cutoff' => $cutoff]);
    }

    if ($protectPublished) {
      // MySQL 8 (Aurora) friendly: derived table with ROW_NUMBER(), not CTE.
      $this->db->query("
        INSERT IGNORE INTO fastrev_node_keep (vid)
        SELECT lp.vid
        FROM (
          SELECT
            r.vid,
            ROW_NUMBER() OVER (
              PARTITION BY r.nid
              ORDER BY r.revision_timestamp DESC, r.vid DESC
            ) AS rn
          FROM node_revision r
          JOIN node_field_revision nfr ON nfr.vid = r.vid
          WHERE nfr.status = 1
        ) AS lp
        WHERE lp.rn = 1
      ");
    }

    if ($keepLast > 0) {
      if ($perLanguage) {
        $this->db->query("
          INSERT IGNORE INTO fastrev_node_keep (vid)
          SELECT r1.vid
          FROM node_revision r1
          JOIN node_field_revision nfr1 ON nfr1.vid = r1.vid
          LEFT JOIN fastrev_node_keep k ON k.vid = r1.vid
          WHERE k.vid IS NULL
            AND (
              SELECT COUNT(*)
              FROM node_revision r2
              JOIN node_field_revision nfr2 ON nfr2.vid = r2.vid
              WHERE r2.nid = r1.nid
                AND nfr2.langcode = nfr1.langcode
                AND (
                  r2.revision_timestamp > r1.revision_timestamp OR
                  (r2.revision_timestamp = r1.revision_timestamp AND r2.vid > r1.vid)
                )
            ) < :n
        ", [':n' => $keepLast]);
      }
      else {
        $this->db->query("
          INSERT IGNORE INTO fastrev_node_keep (vid)
          SELECT r1.vid
          FROM node_revision r1
          LEFT JOIN fastrev_node_keep k ON k.vid = r1.vid
          WHERE k.vid IS NULL
            AND (
              SELECT COUNT(*)
              FROM node_revision r2
              WHERE r2.nid = r1.nid
                AND (
                  r2.revision_timestamp > r1.revision_timestamp OR
                  (r2.revision_timestamp = r1.revision_timestamp AND r2.vid > r1.vid)
                )
            ) < :n
        ", [':n' => $keepLast]);
      }
    }
  }

  /**
   * Populate fastrev_node_delete with all non-default, non-kept node revisions.
   */
  private function planNodeDelete(): void {
    // Use node_field_data as the source of "current" vids; avoid relying on the
    // base 'node' table (schemas differ across sites).
    $this->db->query("
      INSERT IGNORE INTO fastrev_node_delete (vid)
      SELECT r.vid
      FROM node_revision r
      LEFT JOIN fastrev_node_keep k ON k.vid = r.vid
      WHERE k.vid IS NULL
        AND NOT EXISTS (
          SELECT 1
          FROM node_field_data nfd
          WHERE nfd.nid = r.nid
            AND nfd.vid = r.vid
        )
    ");
  }

  /**
   * Compute paragraph revisions currently in use.
   *
   * Sources:
   *  - Paragraph revisions referenced by kept node revisions via ERR fields.
   *  - Paragraph revisions reachable by transitive ERR-on-paragraph nesting
   *    (BFS across all paragraph ERR tables).
   *
   * Notes:
   *  - Uses INSERT IGNORE and a bounded BFS loop (max 50 passes).
   *  - Skips entirely if the site has no relevant ERR tables.
   */
  private function computeParagraphInUse(): void {
    $nodeErr = $this->map->getNodeErrParagraphTables();
    $paraErr = $this->map->getParagraphErrParagraphTables();
    if (!$nodeErr && !$paraErr) {
      return;
    }

    // First-hop: kept node revisions -> paragraph target revisions.
    foreach ($nodeErr as $t) {
      $col = $this->errTargetRevisionColumn($t);
      $this->db->query("
        INSERT IGNORE INTO fastrev_par_in_use (rid)
        SELECT DISTINCT t.`$col`
        FROM `{$t}` t
        INNER JOIN fastrev_node_keep k ON k.vid = t.revision_id
        LEFT JOIN fastrev_par_in_use u ON u.rid = t.`$col`
        WHERE t.`$col` IS NOT NULL AND u.rid IS NULL
      ");
    }

    // BFS: follow ERR edges between paragraph revisions to include nested ones.
    if ($paraErr) {
      $iterations = 0;
      while (TRUE) {
        $before = (int) $this->db->query("SELECT COUNT(*) FROM fastrev_par_in_use")->fetchField();

        foreach ($paraErr as $t) {
          $col = $this->errTargetRevisionColumn($t);
          $this->db->query("
            INSERT IGNORE INTO fastrev_par_in_use (rid)
            SELECT DISTINCT t.`$col`
            FROM `{$t}` t
            INNER JOIN fastrev_par_in_use u1 ON u1.rid = t.revision_id
            LEFT JOIN fastrev_par_in_use u2 ON u2.rid = t.`$col`
            WHERE t.`$col` IS NOT NULL AND u2.rid IS NULL
          ");
        }

        $after = (int) $this->db->query("SELECT COUNT(*) FROM fastrev_par_in_use")->fetchField();
        if ($after <= $before || ++$iterations > 50) {
          break;
        }
      }
    }
  }

  /**
   * Populate fastrev_par_delete with paragraph revisions not in use.
   *
   * Steps:
   *  - Protect current/default paragraph revision pointers (base or data table).
   *  - Keep last M paragraph revisions per entity (D10/11-safe ordering).
   *  - Stage all remaining paragraph revisions for deletion.
   */
  private function planParagraphDelete(int $keepParagraphLast): void {
    $schema  = $this->db->schema();

    // Prefer entity-definition tables (new schema), but fall back to legacy.
    $base = $data = $rev = '';

    try {
      $def = \Drupal::entityTypeManager()->getDefinition('paragraph', FALSE);
      if ($def) {
        $base = (string) ($def->get('base_table') ?? '');
        $data = (string) ($def->get('data_table') ?? '');
        $rev  = (string) ($def->get('revision_table') ?? '');
      }
    } catch (\Throwable $e) {
    }

    $hasNewRev = $rev && $schema->tableExists($rev);
    if (!$hasNewRev) {
      // Legacy fallback.
      $rev  = $schema->tableExists('paragraphs_item_revision') ? 'paragraphs_item_revision' : '';
      $base = $schema->tableExists('paragraphs_item') ? 'paragraphs_item' : '';
      $data = $schema->tableExists('paragraphs_item_field_data') ? 'paragraphs_item_field_data' : '';
    }

    if (!$rev || !$schema->tableExists($rev)) {
      return;
    }

    // Choose a timestamp column when available; otherwise fall back to revision_id.
    $tsCol = $this->detectParagraphRevisionTimestampColumn($rev);
    $orderExpr = $tsCol
      ? "pr2.`{$tsCol}` > pr1.`{$tsCol}` OR (pr2.`{$tsCol}` = pr1.`{$tsCol}` AND pr2.revision_id > pr1.revision_id)"
      : "pr2.revision_id > pr1.revision_id";

    // Protect default paragraph revision pointers (from whichever table exists).
    if ($base && $schema->tableExists($base) && $schema->fieldExists($base, 'revision_id')) {
      $this->db->query("
        INSERT IGNORE INTO fastrev_par_in_use (rid)
        SELECT DISTINCT p.revision_id
        FROM {$base} p
        LEFT JOIN fastrev_par_in_use u ON u.rid = p.revision_id
        WHERE p.revision_id IS NOT NULL AND u.rid IS NULL
      ");
    }
    elseif ($data && $schema->tableExists($data) && $schema->fieldExists($data, 'revision_id')) {
      $this->db->query("
        INSERT IGNORE INTO fastrev_par_in_use (rid)
        SELECT DISTINCT p.revision_id
        FROM {$data} p
        LEFT JOIN fastrev_par_in_use u ON u.rid = p.revision_id
        WHERE p.revision_id IS NOT NULL AND u.rid IS NULL
      ");
    }

    // Keep last M paragraph revisions per entity.
    if ($keepParagraphLast > 0) {
      $this->db->query("
        INSERT IGNORE INTO fastrev_par_in_use (rid)
        SELECT pr1.revision_id AS rid
        FROM {$rev} pr1
        LEFT JOIN fastrev_par_in_use u ON u.rid = pr1.revision_id
        WHERE u.rid IS NULL
          AND (
            SELECT COUNT(*)
            FROM {$rev} pr2
            WHERE pr2.id = pr1.id
              AND ({$orderExpr})
          ) < :m
      ", [':m' => $keepParagraphLast]);
    }

    // Stage for deletion: every paragraph revision not marked IN_USE.
    $this->db->query("
      INSERT IGNORE INTO fastrev_par_delete (rid)
      SELECT pr.revision_id
      FROM {$rev} pr
      LEFT JOIN fastrev_par_in_use u ON u.rid = pr.revision_id
      WHERE u.rid IS NULL
    ");
  }

  /**
   * Populate fastrev_lb_keep/delete for Layout Builder node revision field rows.
   *
   * KEEP: rows whose revision_id equals node_field_data.vid (current per nid/lang).
   * DELETE: all other rows in node_revision__layout_builder__layout.
   * No-op if LB or working tables are missing.
   */
  private function planLayoutBuilderKeepDelete(): void {
    $schema = $this->db->schema();
    if (
      !$schema->tableExists('node_revision__layout_builder__layout') ||
      !$schema->tableExists('fastrev_lb_keep') ||
      !$schema->tableExists('fastrev_lb_delete')
    ) {
      return;
    }

    $this->db->query("
      INSERT IGNORE INTO fastrev_lb_keep (rid)
      SELECT DISTINCT r.revision_id
      FROM node_revision__layout_builder__layout r
      JOIN node_field_data nfd
        ON nfd.nid = r.entity_id AND nfd.langcode = r.langcode
      LEFT JOIN fastrev_lb_keep k ON k.rid = r.revision_id
      WHERE r.revision_id = nfd.vid AND k.rid IS NULL
    ");

    $this->db->query("
      INSERT IGNORE INTO fastrev_lb_delete (rid)
      SELECT r.revision_id
      FROM node_revision__layout_builder__layout r
      LEFT JOIN fastrev_lb_keep k ON k.rid = r.revision_id
      WHERE k.rid IS NULL
    ");
  }

  /**
   * Resolve the ERR target revision column name for a given rev field table.
   *
   * Example:
   *  - 'node_revision__field_body' -> 'field_body_target_revision_id'
   */
  private function errTargetRevisionColumn(string $revTable): string {
    $pos = strpos($revTable, '__');
    if ($pos === false) {
      return 'target_revision_id';
    }
    $field = substr($revTable, $pos + 2);
    return $field . '_target_revision_id';
  }

  /**
   * Detect the best timestamp-like column on the paragraph revision table.
   *
   * Checks common columns in order; returns an empty string if none exist.
   */
  private function detectParagraphRevisionTimestampColumn(string $revTable): string {
    $schema = $this->db->schema();
    foreach (['revision_timestamp', 'revision_created', 'created', 'changed'] as $col) {
      if ($schema->fieldExists($revTable, $col)) {
        return $col;
      }
    }
    return '';
  }

  /**
   * Estimate the total bytes reclaimable if we delete rows flagged by
   * fastrev_node_delete and fastrev_par_delete.
   *
   * Strategy:
   *  - For node side: include node_revision and all node_revision__* tables.
   *  - For paragraph side: include paragraph_revision or paragraphs_item_revision
   *    and all related field revision tables.
   */
  private function estimatePotentialClaimableBytes(): int {
    $schemaName = $this->currentSchema();
    if (!$schemaName) {
      return 0;
    }

    $schema = $this->db->schema();
    $total = 0;

    // --- Node revision tables ---
    if ($schema->tableExists('fastrev_node_delete')) {
      if ($schema->tableExists('node_revision')) {
        $total += $this->estimateTableContribution('node_revision', 'vid', 'fastrev_node_delete', 'vid');
      }
      foreach ($this->listTablesLike($schemaName, 'node\\_revision\\__%') as $table) {
        $joinCol = $this->pickRevisionJoinColumn($table, ['revision_id', 'vid']);
        if ($joinCol !== '') {
          $total += $this->estimateTableContribution($table, $joinCol, 'fastrev_node_delete', 'vid');
        }
      }
    }

    // --- Paragraph revision tables (support both schemas) ---
    if ($schema->tableExists('fastrev_par_delete')) {
      // Prefer newer schema if present; otherwise use older.
      $paraRevNew = $schema->tableExists('paragraph_revision') ? 'paragraph_revision' : '';
      $paraRevOld = $schema->tableExists('paragraphs_item_revision') ? 'paragraphs_item_revision' : '';
      $paraRev = $paraRevNew ?: $paraRevOld;

      if ($paraRev !== '') {
        $total += $this->estimateTableContribution($paraRev, 'revision_id', 'fastrev_par_delete', 'rid');
      }

      // Newer schema’s consolidated field-revision table.
      if ($schema->tableExists('paragraph_field_revision')) {
        $total += $this->estimateTableContribution('paragraph_field_revision', 'revision_id', 'fastrev_par_delete', 'rid');
      }

      // Older schema’s consolidated field-revision table.
      if ($schema->tableExists('paragraphs_item_revision_field_data')) {
        $total += $this->estimateTableContribution('paragraphs_item_revision_field_data', 'revision_id', 'fastrev_par_delete', 'rid');
      }

      // All per-field rev tables like "paragraph_revision__field_*"
      foreach ($this->listTablesLike($schemaName, 'paragraph\\_revision\\__%') as $table) {
        $joinCol = $this->pickRevisionJoinColumn($table, ['revision_id']);
        if ($joinCol !== '') {
          $total += $this->estimateTableContribution($table, $joinCol, 'fastrev_par_delete', 'rid');
        }
      }
    }

    return (int) round($total);
  }

  /**
   * Return the current DB/schema name.
   */
  private function currentSchema(): ?string {
    return $this->db->getConnectionOptions()['database'] ?? NULL;
  }

  /**
   * List tables in the current schema matching a LIKE pattern.
   */
  private function listTablesLike(string $schema, string $like): array {
    $result = $this->db->query("
      SELECT TABLE_NAME
      FROM information_schema.TABLES
      WHERE TABLE_SCHEMA = :s AND TABLE_NAME LIKE :p ESCAPE '\\\\'
    ", [
      ':s' => $schema,
      ':p' => $like,
    ])->fetchCol();
    return array_map('strval', $result ?: []);
  }

  /**
   * Choose an appropriate revision id column to join on, if present.
   */
  private function pickRevisionJoinColumn(string $table, array $candidates): string {
    $schema = $this->db->schema();
    foreach ($candidates as $col) {
      if ($schema->fieldExists($table, $col)) {
        return $col;
      }
    }
    return '';
  }

  /**
   * Estimate this table's contribution: avg_row_size(table) * rows_to_delete.
   */
  private function estimateTableContribution(
    string $dataTable,
    string $dataJoinCol,
    string $selectorTable,
    string $selectorCol
  ): float {
    $schemaName = $this->currentSchema();
    if (!$schemaName) {
      return 0.0;
    }
    $schema = $this->db->schema();
    if (!$schema->tableExists($dataTable) || !$schema->tableExists($selectorTable)) {
      return 0.0;
    }

    // Count rows in dataTable that would be deleted (join to working set).
    $rowsToDelete = (int) $this->db->query("
      SELECT COUNT(*)
      FROM `{$dataTable}` d
      INNER JOIN `{$selectorTable}` s ON s.`{$selectorCol}` = d.`{$dataJoinCol}`
    ")->fetchField();

    if ($rowsToDelete <= 0) {
      return 0.0;
    }

    // Avg row size from information_schema.
    $avgRow = $this->db->query("
      SELECT CASE WHEN TABLE_ROWS > 0
                  THEN (DATA_LENGTH + INDEX_LENGTH) / TABLE_ROWS
                  ELSE 0 END AS avg_row
      FROM information_schema.TABLES
      WHERE TABLE_SCHEMA = :s AND TABLE_NAME = :t
    ", [
      ':s' => $schemaName,
      ':t' => $dataTable,
    ])->fetchField();

    $avgRow = $avgRow !== FALSE ? (float) $avgRow : 0.0;
    return $avgRow * $rowsToDelete;
  }

  /**
   * Best-effort detection of the paragraph revision base table.
   *
   * (Kept for backward compatibility; not relied on by estimator anymore.)
   */
  private function detectParagraphRevisionBaseTable(): string {
    $schema = $this->db->schema();
    if ($schema->tableExists('paragraph_revision')) {
      return 'paragraph_revision';
    }
    $name = $this->db->query("
      SELECT TABLE_NAME
      FROM information_schema.TABLES
      WHERE TABLE_SCHEMA = :s AND TABLE_NAME LIKE 'paragraph\\_revision' ESCAPE '\\\\'
      LIMIT 1
    ", [':s' => $this->currentSchema()])->fetchField();
    return is_string($name) ? $name : '';
  }

}
