<?php

/**
 * @file
 * Contains \Drupal\ai_content_migrate\Plugin\AiAgent\AIContentMigrate.
 */

namespace Drupal\ai_content_migrate\Plugin\AiAgent;

use Drupal\ai\OperationType\Chat\ChatMessage;
use Drupal\Core\Access\AccessResult;
use Drupal\Core\DependencyInjection\DependencySerializationTrait;
use Drupal\Core\StringTranslation\TranslatableMarkup;
use Drupal\Core\Url;
use Drupal\ai_agents\Attribute\AiAgent;
use Drupal\ai_agents\Exception\AgentProcessingException;
use Drupal\ai_agents\PluginBase\AiAgentBase;
use Drupal\ai_agents\PluginInterfaces\AiAgentInterface;
use Drupal\field\Entity\FieldStorageConfig;
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Entity\EntityFieldManagerInterface;
use Drupal\Core\Entity\EntityDisplayRepositoryInterface;
use Drupal\file\FileRepositoryInterface;
use Drupal\Component\Datetime\TimeInterface;
use Drupal\Core\State\StateInterface;
use Drupal\Core\Queue\QueueFactory;
use GuzzleHttp\ClientInterface;
use Psr\Log\LoggerInterface;

use Drupal\node\Entity\NodeType;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Drupal\field\Entity\FieldConfig;
use Drupal\taxonomy\Entity\Vocabulary;
use Drupal\taxonomy\Entity\Term;
use Drupal\media\Entity\MediaType;
use HeadlessChromium\BrowserFactory;

/**
 * Provides the AI Content Migrate Agent plugin.
 *
 * This agent coordinates model-assisted migration tasks such as:
 *  - Proposing and applying content type schemas and fields.
 *  - Importing media and taxonomies.
 *  - Migrating a single page or enqueuing multiple pages for migration.
 */
#[AiAgent(
  id: 'ai_content_migrate_agent',
  label: new TranslatableMarkup('AI Content Migrate Agent'),
)]
class AIContentMigrate extends AiAgentBase implements AiAgentInterface {

  use DependencySerializationTrait;

  /**
   * Questions to ask.
   *
   * @var array
   */
  protected $questions = [];

  /**
   * The full result of the task.
   *
   * @var array
   */
  protected $result;

  /**
   * The full data of the initial task.
   *
   * @var array
   */
  protected $data;

  /**
   * Task type.
   *
   * @var string
   */
  protected $taskType;

  /**
   * The entity type manager.
   *
   * @var \Drupal\Core\Entity\EntityTypeManagerInterface
   */
  protected EntityTypeManagerInterface $entityTypeManager;

  /**
   * The entity field manager.
   *
   * @var \Drupal\Core\Entity\EntityFieldManagerInterface
   */
  protected EntityFieldManagerInterface $entityFieldManager;

  /**
   * The entity display repository.
   *
   * @var \Drupal\Core\Entity\EntityDisplayRepositoryInterface
   */
  protected EntityDisplayRepositoryInterface $entityDisplayRepository;

  /**
   * The file repository.
   *
   * @var \Drupal\file\FileRepositoryInterface
   */
  protected FileRepositoryInterface $fileRepository;

  /**
   * The HTTP client.
   *
   * @var \GuzzleHttp\ClientInterface
   */
  protected ClientInterface $httpClient;

  /**
   * Logger channel.
   *
   * @var \Psr\Log\LoggerInterface
   */
  protected LoggerInterface $logger;

  /**
   * State service.
   *
   * @var \Drupal\Core\State\StateInterface
   */
  protected $state;

  /**
   * Time service.
   *
   * @var \Drupal\Component\Datetime\TimeInterface
   */
  protected TimeInterface $time;

  /**
   * Queue factory.
   *
   * @var \Drupal\Core\Queue\QueueFactory
   */
  protected QueueFactory $queueFactory;

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    $instance = parent::create($container, $configuration, $plugin_id, $plugin_definition);
    $instance->entityFieldManager = $container->get('entity_field.manager');
    $instance->entityTypeManager = $container->get('entity_type.manager');
    $instance->entityDisplayRepository = $container->get('entity_display.repository');
    $instance->fileRepository = $container->get('file.repository');
    $instance->httpClient = $container->get('http_client');
    $instance->logger = $container->get('logger.factory')->get('ai_content_migrate');
    $instance->state = $container->get('state');
    $instance->time = $container->get('datetime.time');
    $instance->queueFactory = $container->get('queue');
    return $instance;
  }

  /**
   * {@inheritdoc}
   */
  public function getId() {
    return 'ai_content_migrate_agent';
  }

  /**
   * {@inheritdoc}
   */
  public function agentsNames(): array {
    return [
      'AI Content Migrate Agent',
    ];
  }

  /**
   * {@inheritdoc}
   */
  public function agentsCapabilities(): array {
    return [
      'ai_content_migrate_agent' => [
        'name' => 'AI Content Migrate Agent',
        'description' => $this->t("Handles dynamic content type suggestion, creation, media import, and node migration guided by AI."),
        'inputs' => [
          'payload' => [
            'name' => 'Payload',
            'type' => 'array',
            'description' => $this->t('Data input for AI-driven migration tasks.'),
            'default_value' => [],
          ],
        ],
        'outputs' => [
          'result' => [
            'description' => $this->t('The outcome of the AI-driven migration task.'),
            'type' => 'array',
          ],
        ],
      ],
    ];
  }

  /**
   * {@inheritdoc}
   */
  public function setData($data): void {
    $this->data = $data;
  }

  /**
   * {@inheritdoc}
   */
  public function isAvailable() {
    return $this->agentHelper->isModuleEnabled('node') && $this->agentHelper->isModuleEnabled('media');
  }

  /**
   * {@inheritdoc}
   */
  public function isNotAvailableMessage() {
    return $this->t('You need to enable the node and media modules to use this agent.');
  }

  /**
   * {@inheritdoc}
   */
  public function getRetries(): int {
    return 2;
  }

  /**
   * {@inheritdoc}
   */
  public function getData() {
    return $this->data;
  }

  /**
   * {@inheritdoc}
   *
   * Determines if the incoming task can be solved immediately, needs
   * additional answers, or cannot be solved.
   */
  public function determineSolvability(): int {
    parent::determineSolvability();
    $this->taskType = $this->determineTaskType();
    $data = $this->getData();
    if (!is_array($data) && !is_null($data)) {
      $data = $this->decodeModelPayload($data);
    }
    $data[] = ['action' => $this->taskType];
    $this->setData($data);

    switch ($this->taskType) {
      case 'applySchema':
        return AiAgentInterface::JOB_SOLVABLE;

      case 'question':
      case 'discoverSource':
        return AiAgentInterface::JOB_NEEDS_ANSWERS;

      case 'fail':
      default:
        return AiAgentInterface::JOB_NOT_SOLVABLE;
    }
  }

  /**
   * {@inheritdoc}
   */
  public function askQuestion(): array {
    $model = $this->data[0]['model'] ?? [];
    if (empty($model)) {
      $model = $this->data['model'];
    }
    $this->saveLastModel($model);

    $questions = [];
    $modelProposed = $this->formatProposedModel($model);
    $questions[] = $modelProposed;
    $this->saveLastQuestion($modelProposed);
    return $questions;
  }

  /**
   * {@inheritdoc}
   */
  public function answerQuestion(): string {
    $model = $this->data[0]['model'] ?? [];
    $this->saveLastModel($model);
    return $this->formatProposedModel($model);
  }

  /**
   * {@inheritdoc}
   */
  public function solve() {
    switch ($this->data[0]['action'] ?? '') {
      case 'discoverSource':
        return $this->discoverSource();

      case 'applySchema':
        return $this->createContentTypesAndFields();

      default:
        throw new AgentProcessingException($this->t('Unknown action type.'));
    }
  }

  /**
   * Discovers the source (placeholder for future implementation).
   *
   * @return string
   *   Discovery output or an empty string.
   */
  protected function discoverSource(): string {
    return '';
  }

  /**
   * Applies the proposed model: creates bundles/fields and imports content.
   *
   * @return string
   *   A human-readable message describing the result.
   *
   * @throws \Drupal\ai_agents\Exception\AgentProcessingException
   *   Thrown when the operation fails.
   */
  protected function createContentTypesAndFields(): string {
    $modelToCreate = $this->getLastModel();

    try {
      $this->importFromJson($modelToCreate);
    }
    catch (\Exception $e) {
      throw new AgentProcessingException($this->t('Failed to create content types and fields.'));
    }

    $multiple = count($this->getPages()) > 1;

    try {
      if (!$multiple) {
        return $this->importContent($this->getHtmlPage(), $modelToCreate);
      }
      else {
        return $this->createQueue($modelToCreate);
      }
    }
    catch (\Exception $e) {
      throw new AgentProcessingException($this->t('Failed to create content types and fields.'));
    }
  }

  /**
   * Determines the task type from input data and sub-agent routing.
   *
   * This method may fetch HTML for the first URL, ask sub-agents for a model
   * proposal, and store the intermediate state for later steps.
   *
   * @return string
   *   The task type to execute next (e.g., 'discoverSource' or 'applySchema').
   */
  protected function determineTaskType(): string {
    $data = $this->agentHelper->runSubAgent('RouterCall', [
      'prompt' => $this->task->getDescription(),
      'existing model' => json_encode($this->getLastModel()),
    ]);

    $url = '';
    $urls = isset($data['urls'][0]) && !empty($data['urls'][0]) ? $data['urls'] : NULL;
    if (is_null($urls)) {
      $urls = isset($data[0]['urls']) ? $data[0]['urls'] : NULL;
    }

    // Flag whether there are multiple URLs and set them via the provided setter.
    if (!is_null($urls) && !empty($urls)) {
      $this->setPages($urls);
      $url = $urls[0] ?? '';
      $this->state->delete('ai_content_migrate.last_model');
    }

    // Detect commands.
    $isChangingModel = $data['action'] == 'refineModel';
    $isDryRun = $data['action'] == 'DryRunImport';
    $this->setDryRun($isDryRun);

    // If no URL and not changing the model, we import the already defined content.
    if (empty($url) && !$isChangingModel) {
      return 'applySchema';
    }

    // Optionally fetch the HTML content from the first URL (skip if changing the model).
    if (!$isChangingModel && !empty($url)) {
      $contentHtml = $this->retrieveContentHtml($url);
    }

    if (isset($data[0]['action']) && $data[0]['action'] == 'analyzeParagraphs') {
      $data = $this->agentHelper->runSubAgent('modelProposalParagraphs', [
        'html of the old website page' => $contentHtml,
        'existing model' => json_encode($this->getLastModel()),
      ]);
    }
    else {
      // Ask the sub-agent for a proposed model based on the fetched HTML and last known model.
      $data = $this->agentHelper->runSubAgent('modelProposal', [
        'html of the old website page' => $contentHtml,
        'existing model' => json_encode($this->getLastModel()),
      ]);
    }

    // Persist the fetched HTML for later steps (only if we actually fetched it).
    if (!$isChangingModel) {
      $this->setHtmlPage($contentHtml);
    }

    // Store agent output for downstream steps.
    $this->setData($data);

    // Continue with the standard flow (discover source, import media, create CT/fields, etc.).
    return 'discoverSource';
  }

  /**
   * Placeholder for a future website-level scrape.
   */
  protected function scrapeWebsiteUrl() {

  }

  /**
   * Builds a human-friendly HTML preview of the proposed model.
   *
   * @param array $data
   *   The proposed model array with content_types, taxonomies, media_bundles.
   *
   * @return string
   *   Rendered HTML string.
   */
  protected function formatProposedModel(array $data) : string {
    // Helper for safe HTML escaping.
    $e = function ($v) {
      return \Drupal\Component\Utility\Html::escape(
        is_scalar($v) ? (string) $v : json_encode($v)
      );
    };

    // Helper for cardinality text.
    $formatCardinality = function ($card) {
      $n = is_numeric($card) ? (int) $card : 1;
      if ($n === -1) {
        return $this->t('unlimited');
      }
      if ($n === 1) {
        return $this->t('single value');
      }
      return $this->t('up to @n', ['@n' => $n]);
    };

    // Helper to truncate long strings.
    $truncate = function (string $text, int $max = 80) {
      return (mb_strlen($text) > $max)
        ? (mb_substr($text, 0, $max - 1) . '…')
        : $text;
    };

    $contentTypes = $data['content_types'] ?? [];
    $taxonomies   = $data['taxonomies'] ?? [];
    $mediaBundles = $data['media_bundles'] ?? [];

    // Numeric summary.
    $totalFields = 0;
    $requiredFields = 0;
    foreach ($contentTypes as $ct) {
      foreach (($ct['fields'] ?? []) as $f) {
        $totalFields++;
        if (!empty($f['required'])) {
          $requiredFields++;
        }
      }
    }

    $output  = '<h2>' . $this->t('Do you want to import the following data model?') . '</h2>';
    $output .= '<p>' . $this->t('@ct content type(s), @fields field(s) (@req required), @tx taxonomie(s), @mb media bundle(s).', [
        '@ct' => count($contentTypes),
        '@fields' => $totalFields,
        '@req' => $requiredFields,
        '@tx' => count($taxonomies),
        '@mb' => count($mediaBundles),
      ]) . '</p>';

    // Content types.
    $output .= "<h2>" . $this->t('Content Types (@count)', ['@count' => count($contentTypes)]) . "</h2><ul>";
    foreach ($contentTypes as $type) {
      $label = $e($type['label'] ?? '');
      $machine = $e($type['type'] ?? '');
      $desc = trim((string) ($type['description'] ?? ''));
      $fields = $type['fields'] ?? [];
      $fieldCount = count($fields);

      $output .= "<li><strong>{$label}</strong> <code>{$machine}</code>";
      if ($desc !== '') {
        $output .= " — " . $e($desc);
      }
      $output .= "<details><summary>" . $this->t('@count field(s)', ['@count' => $fieldCount]) . "</summary><ul>";

      foreach ($fields as $f) {
        $fname = $e($f['name'] ?? '');
        $flabel = $e($f['label'] ?? '');
        $ftype = $e($f['type'] ?? '');
        $required = !empty($f['required']) ? $this->t('required') : $this->t('optional');
        $cardText = $formatCardinality($f['cardinality'] ?? 1);

        $output .= "<li><strong>{$flabel}</strong> <code>{$fname}</code> — <em>{$ftype}</em> · {$required} · "
          . $this->t('Cardinality: @card', ['@card' => $cardText]);

        $xps = $f['xpaths'] ?? [];
        if (!empty($xps)) {
          $shown = array_slice($xps, 0, 2);
          $extra = max(0, count($xps) - 2);
          $xpHtml = array_map(fn($x) => '<code>' . $e($x) . '</code>', $shown);
          $output .= "<br/>" . $this->t('XPaths:') . " " . implode(', ', $xpHtml);
          if ($extra > 0) {
            $output .= ' ' . $this->t('(+@n more)', ['@n' => $extra]);
          }
        }
        $output .= "</li>";
      }

      $output .= "</ul></details></li>";
    }
    $output .= "</ul>";

    // Taxonomies.
    $output .= "<h2>" . $this->t('Taxonomies (@count)', ['@count' => count($taxonomies)]) . "</h2><ul>";
    foreach ($taxonomies as $taxonomy) {
      $label = $e($taxonomy['label'] ?? '');
      $machine = $e($taxonomy['vocabulary'] ?? '');
      $terms = $taxonomy['terms'] ?? [];
      $preview = array_slice($terms, 0, 6);
      $extra = max(0, count($terms) - 6);

      $output .= "<li><strong>{$label}</strong> <code>{$machine}</code> — "
        . $this->t('@n term(s)', ['@n' => count($terms)]) . ": "
        . $e(implode(', ', $preview));
      if ($extra > 0) {
        $output .= ' ' . $this->t('(+@n more)', ['@n' => $extra]);
      }
      $output .= "</li>";
    }
    $output .= "</ul>";

    // Media bundles.
    $output .= "<h2>" . $this->t('Media Bundles (@count)', ['@count' => count($mediaBundles)]) . "</h2><ul>";
    foreach ($mediaBundles as $bundle) {
      $bundleName = $e($bundle['bundle'] ?? '');
      $items = $bundle['items'] ?? [];
      $itemCount = count($items);

      $output .= "<li><strong>{$bundleName}</strong> — "
        . $this->t('@n item(s)', ['@n' => $itemCount]);

      if ($itemCount > 0) {
        $first = $items[0];
        $alt = $e($first['alt'] ?? '');
        $url = $e($truncate((string) ($first['url'] ?? ''), 90));
        $output .= "<br/>" . $this->t('Example:') . " <em>{$alt}</em> — <code>{$url}</code>";
      }
      $output .= "</li>";
    }
    $output .= "</ul>";

    return $output;
  }

  /**
   * Persists the last model in state for later steps.
   *
   * @param array $model
   *   The model to save.
   */
  private function saveLastModel(array $model) {
    $this->state->set('ai_content_migrate.last_model', $model);
  }

  /**
   * Persists the last question/answer HTML preview for later retrieval.
   *
   * @param string $question
   *   The HTML preview string.
   */
  private function saveLastQuestion(string $question) {
    $this->state->set('ai_content_migrate.last_answer', $question);
  }

  /**
   * Stores the rendered HTML page content for subsequent import.
   *
   * @param string $contentHtml
   *   The HTML markup.
   */
  private function setHtmlPage(string $contentHtml) {
    $this->state->set('ai_content_migrate.html_page', $contentHtml);
  }

  /**
   * Gets the stored HTML markup for the current import.
   *
   * @return string|null
   *   The HTML markup or NULL if not set.
   */
  private function getHtmlPage() {
    return $this->state->get('ai_content_migrate.html_page');
  }

  /**
   * Gets the stored last human-friendly model preview (if any).
   *
   * @return string|null
   *   The HTML preview or NULL if not set.
   */
  private function getLastQuestion() {
    return $this->state->get('ai_content_migrate.last_answer');
  }

  /**
   * Gets the stored last model (if any).
   *
   * @return array|null
   *   The model array or NULL if not set.
   */
  private function getLastModel() {
    return $this->state->get('ai_content_migrate.last_model');
  }

  /**
   * Persists the list of pages discovered by the router.
   *
   * @param array $urls
   *   List of URLs.
   */
  private function setPages(array $urls) {
    $this->state->set('ai_content_migrate.pages', $urls);
  }

  /**
   * Returns the list of pages to be imported.
   *
   * @return array|null
   *   List of URLs or NULL.
   */
  private function getPages() {
    return $this->state->get('ai_content_migrate.pages');
  }

  /**
   * Sets dry-run behavior in state.
   *
   * @param bool $dryRun
   *   TRUE to enable dry-run, FALSE otherwise.
   */
  private function setDryRun(bool $dryRun) {
    $this->state->set('ai_content_migrate.dry_run', $dryRun);
  }

  /**
   * Gets dry-run behavior from state.
   *
   * @return bool|null
   *   TRUE for dry run, FALSE otherwise, or NULL if not set.
   */
  private function getDryRun() {
    return $this->state->get('ai_content_migrate.dry_run');
  }

  /**
   * Imports content types, fields, taxonomies, and media bundles from a model.
   *
   * @param array $data
   *   The decoded model array.
   *
   * @throws \Exception
   *   Thrown when the model structure is invalid.
   */
  public function importFromJson($data): void {
    if (!is_array($data)) {
      throw new \Exception("Invalid JSON structure");
    }

    // Display repository for form/view display configuration.
    $display_repo = $this->entityDisplayRepository;

    // 1) Vocabularies and terms.
    foreach ($data['taxonomies'] ?? [] as $tax) {
      $vid = $tax['vocabulary'];
      if (!\Drupal\taxonomy\Entity\Vocabulary::load($vid)) {
        if (!$this->getDryRun()) {
          \Drupal\taxonomy\Entity\Vocabulary::create([
            'vid' => $vid,
            'name' => $tax['label'] ?? $vid,
          ])->save();
        }
      }
      foreach (($tax['terms'] ?? []) as $term_label) {
        $exists = $this->entityTypeManager->getStorage('taxonomy_term')->loadByProperties([
          'vid' => $vid,
          'name' => $term_label,
        ]);
        if (!$exists) {
          if (!$this->getDryRun()) {
            \Drupal\taxonomy\Entity\Term::create([
              'vid' => $vid,
              'name' => $term_label,
            ])->save();
          }
        }
      }
    }

    // 2) Media bundles (e.g., image).
    foreach ($data['media_bundles'] ?? [] as $bundle) {
      $mid = $bundle['bundle'];
      if ($mid && !\Drupal\media\Entity\MediaType::load($mid)) {
        if (!$this->getDryRun()) {
          \Drupal\media\Entity\MediaType::create([
            'id' => $mid,
            'label' => ucfirst($mid),
            'source' => $mid,
          ])->save();
        }
      }
    }

    // 3) Content types and fields.
    foreach ($data['content_types'] ?? [] as $ct) {
      $type = $ct['type'];
      $label = $ct['label'] ?? $type;

      // Create bundle if missing.
      $node_type = \Drupal\node\Entity\NodeType::load($type);
      if (!$node_type) {
        $node_type = \Drupal\node\Entity\NodeType::create([
          'type' => $type,
          'name' => $label,
          'description' => $ct['description'] ?? '',
        ]);
        if (!$this->getDryRun()) {
          $node_type->save();
        }
      }

      // Retrieve/create displays through the service.
      $form_display = $display_repo->getFormDisplay('node', $type, 'default');
      $view_display = $display_repo->getViewDisplay('node', $type, 'default');

      // Widget/formatter mapping.
      $widgetByType = [
        'string' => 'string_textfield',
        'string_long' => 'text_textarea',
        'text' => 'text_textarea',
        'text_long' => 'text_textarea',
        'text_with_summary' => 'text_textarea_with_summary',
        'boolean' => 'boolean_checkbox',
        'integer' => 'number',
        'decimal' => 'number',
        'float' => 'number',
        'entity_reference' => 'entity_reference_autocomplete',
        'image' => 'image_image',
        'file' => 'file_generic',
        'datetime' => 'datetime_default',
        'timestamp' => 'datetime_timestamp',
        'link' => 'link_default',
        'list_string' => 'options_select',
        'list_integer' => 'options_select',
      ];
      $formatterByType = [
        'string' => 'string',
        'string_long' => 'text_default',
        'text' => 'text_default',
        'text_long' => 'text_default',
        'text_with_summary' => 'text_default',
        'boolean' => 'boolean',
        'integer' => 'number_integer',
        'decimal' => 'number_decimal',
        'float' => 'number_decimal',
        'entity_reference' => 'entity_reference_label',
        'image' => 'image',
        'file' => 'file_default',
        'datetime' => 'datetime_default',
        'timestamp' => 'timestamp',
        'link' => 'link',
        'list_string' => 'list_default',
        'list_integer' => 'list_default',
      ];

      foreach ($ct['fields'] as $i => $field) {
        $name = $field['name'];
        $type_field = $field['type'];
        $label_field = $field['label'] ?? $name;
        $required = (bool) ($field['required'] ?? FALSE);
        $cardinality = (int) ($field['cardinality'] ?? 1);
        $weight = $field['weight'] ?? $i;

        // Special handling for the title.
        if ($name === 'title' && !$this->getDryRun()) {
          $node_type->set('title_label', $label_field);
          $node_type->save();
          $form_display->setComponent('title', [
            'type' => $widgetByType['string'],
            'weight' => $weight,
          ]);
          continue;
        }

        // Field API: create storage and instance if missing.
        $field_name = 'field_' . $name;

        if (!\Drupal\field\Entity\FieldStorageConfig::loadByName('node', $field_name)) {
          $storage_settings = [];
          if ($type_field === 'list_string' && !empty($field['options'])) {
            $opts = $field['options'];
            if (array_is_list($opts)) {
              $opts = array_combine($opts, $opts);
            }
            $storage_settings['allowed_values'] = $opts;
          }
          if (!$this->getDryRun()) {
            $fieldArray = [
              'field_name' => $field_name,
              'entity_type' => 'node',
              'type' => $type_field,
              'cardinality' => $cardinality,
              'settings' => $storage_settings,
            ];
            if (strpos($field_name, 'tags') !== FALSE) {
              $fieldArray['settings'] = [
                'target_type' => 'taxonomy_term',
              ];
            }
            $fieldConfig = \Drupal\field\Entity\FieldStorageConfig::create($fieldArray);
            $fieldConfig->save();
          }
        }

        if (!\Drupal\field\Entity\FieldConfig::loadByName('node', $type, $field_name)) {
          if (!$this->getDryRun()) {
            $fieldArray = [
              'field_name' => $field_name,
              'entity_type' => 'node',
              'bundle' => $type,
              'label' => $label_field,
              'required' => $required,
            ];
            if (strpos($field_name, 'tags') !== FALSE) {
              $fieldArray['settings']  = [
                'handler' => 'default:taxonomy_term',
              ];
            }
            \Drupal\field\Entity\FieldConfig::create($fieldArray)->save();
          }
        }

        // Form display.
        $widget = $field['widget'] ?? ($widgetByType[$type_field] ?? 'string_textfield');
        $form_display->setComponent($field_name, [
          'type' => $widget,
          'weight' => $weight,
        ]);

        // View display.
        $formatter = $field['formatter'] ?? ($formatterByType[$type_field] ?? 'string');
        $view_display->setComponent($field_name, [
          'type' => $formatter,
          'weight' => $weight,
          'label' => $field['display_label'] ?? 'above',
        ]);
      }

      // Save displays.
      if (!$this->getDryRun()) {
        $form_display->save();
        $view_display->save();
      }
    }
  }

  /**
   * Imports content from a single HTML page using XPaths defined in the model.
   *
   * Order of operations:
   *  1) Import media first (from model media_bundles and any media xpaths).
   *  2) Import taxonomy terms (from model taxonomies and from xpaths).
   *  3) Create content nodes and attach references.
   *
   * @param string $html
   *   HTML markup or an absolute path to the HTML file.
   * @param mixed $modelToCreate
   *   Model (array/object/string JSON) with content_types, taxonomies, media_bundles.
   *
   * @return string
   *   A message summarizing the import outcome.
   *
   * @throws \Exception
   */
  private function importContent(string $html, mixed $modelToCreate): string {
    // Normalize model structure.
    $model = is_array($modelToCreate) ? $modelToCreate
      : (is_string($modelToCreate) ? (json_decode($modelToCreate, TRUE) ?? [])
        : (is_object($modelToCreate) ? json_decode(json_encode($modelToCreate), TRUE) : []));

    // Load HTML (path or markup).
    $baseDir = null;
    if (is_file($html)) {
      $baseDir = rtrim(dirname(realpath($html)), DIRECTORY_SEPARATOR) . DIRECTORY_SEPARATOR;
      $htmlContent = file_get_contents($html);
    }
    else {
      $htmlContent = $html;
    }

    // Parse DOM/XPath (tolerant to encoding issues).
    libxml_use_internal_errors(TRUE);
    $dom = new \DOMDocument('1.0', 'UTF-8');
    $dom->loadHTML($htmlContent);
    libxml_clear_errors();
    $xp = new \DOMXPath($dom);

    // Helper to resolve relative URLs when reading from file system.
    $resolveUrl = function (string $url) use ($baseDir): string {
      if ($baseDir === null || preg_match('#^https?://#i', $url) || str_starts_with($url, 'file://')) {
        return $url;
      }
      $candidate = $baseDir . ltrim($url, '/');
      return file_exists($candidate) ? 'file://' . $candidate : $url;
    };

    // Evaluate an XPath and return a single scalar value when possible.
    $evaluateOne = function (string $expr) use ($xp) {
      try {
        $res = $xp->evaluate($expr);
      }
      catch (\Throwable $e) {
        return null;
      }
      if ($res instanceof \DOMNodeList) {
        if ($res->length) {
          $item = $res->item(0);
          return $item instanceof \DOMAttr ? $item->value : trim($item->textContent);
        }
        return null;
      }
      // Scalars: string/number/bool.
      $val = is_bool($res) ? ($res ? '1' : '0') : (string) $res;
      $val = trim($val);
      return ($val === '') ? null : $val;
    };

    // Return the first non-empty match from a list of XPaths.
    $firstMatch = function(array $xpaths) use ($evaluateOne) {
      foreach ($xpaths as $x) {
        $v = $evaluateOne($x);
        if ($v !== null && $v !== '') {
          return $v;
        }
      }
      return NULL;
    };

    // Return all matches across a list of XPaths.
    $allMatches = function(array $xpaths) use ($xp): array {
      $out = [];
      foreach ($xpaths as $x) {
        try {
          $res = $xp->evaluate($x);
        }
        catch (\Throwable $e) {
          continue;
        }
        if ($res instanceof \DOMNodeList) {
          if ($res->length) {
            foreach ($res as $item) {
              $out[] = $item instanceof \DOMAttr ? $item->value : trim($item->textContent);
            }
          }
        }
        else {
          $val = is_bool($res) ? ($res ? '1' : '0') : (string) $res;
          $val = trim($val);
          if ($val !== '') {
            $out[] = $val;
          }
        }
      }
      return $out;
    };

    // Helper to sanitize integer-like values.
    $cleanInt = function ($value): ?int {
      if ($value === NULL) return NULL;
      if (is_numeric($value)) return (int) $value;
      if (preg_match('/-?\d+/', (string) $value, $m)) return (int) $m[0];
      return NULL;
    };

    // Page title used for image alt/title fallback.
    $pageTitle = null;
    foreach (($model['content_types'] ?? []) as $ct) {
      foreach (($ct['fields'] ?? []) as $f) {
        if (($f['name'] ?? '') === 'title' && !empty($f['xpaths'])) {
          $pageTitle = $firstMatch($f['xpaths']);
          break 2;
        }
      }
    }
    if (!$pageTitle) {
      $pageTitle = $firstMatch(["//meta[@property='og:title']/@content", "//h1[normalize-space()]"]);
    }
    $defaultAlt = isset($model['default_alt']) && trim((string) $model['default_alt']) !== ''
      ? trim((string) $model['default_alt'])
      : 'Immagine';

    // ===== 1) MEDIA from media_bundles =====
    $createdMediaByUrl = [];          // url => mid
    $mediaByBundle = [];              // bundle => [mid...]
    $mediaInfo = [];                  // mid => ['fid' => X, 'alt' => '...', 'source_field' => 'field_media_image']

    foreach ($model['media_bundles'] ?? [] as $bundle) {
      $bundle_id = $bundle['bundle'] ?? 'image';
      $media_type = \Drupal\media\Entity\MediaType::load($bundle_id);
      if (!$media_type) {
        continue;
      }
      $source_field = $media_type->getSource()->getSourceFieldDefinition($media_type)->getName();

      foreach ($bundle['items'] ?? [] as $item) {
        $url = $item['url'] ?? NULL;
        if (!$url) continue;
        $absUrl = $resolveUrl($url);

        // Download bytes.
        $data = NULL;
        try {
          if (preg_match('#^https?://#i', $absUrl)) {
            $resp = $this->httpClient->get($absUrl, ['timeout' => 20]);
            $data = (string) $resp->getBody();
          }
          elseif (str_starts_with($absUrl, 'file://')) {
            $data = @file_get_contents(substr($absUrl, 7));
          }
        }
        catch (\Throwable $e) {
          \Drupal::logger('aicontent')->error('Fetch media failed @u: @m', ['@u' => $absUrl, '@m' => $e->getMessage()]);
        }
        if (!$data) continue;

        // Create media only when NOT a dry-run.
        if ($this->getDryRun()) {
          continue;
        }

        $filename = basename(parse_url($absUrl, PHP_URL_PATH) ?: ('media_' . uniqid() . '.bin'));
        try {
          $file = $this->fileRepository->writeData(
            $data,
            'public://aicontent/' . $filename,
            \Drupal\Core\File\FileSystemInterface::EXISTS_RENAME
          );
        }
        catch (\Throwable $e) {
          \Drupal::logger('aicontent')->error('Write file failed: @m', ['@m' => $e->getMessage()]);
          continue;
        }

        // ALT fallback: item.alt -> pageTitle -> default.
        $altText = '';
        if (isset($item['alt']) && trim((string) $item['alt']) !== '') {
          $altText = trim((string) $item['alt']);
        }
        elseif (!empty($pageTitle)) {
          $altText = $pageTitle;
        }
        else {
          $altText = $defaultAlt;
        }

        $media = \Drupal\media\Entity\Media::create([
          'bundle' => $bundle_id,
          'name' => $filename,
          $source_field => [
            'target_id' => $file->id(),
            'alt' => $altText,
            'title' => $altText,
          ],
          'status' => 1,
        ]);
        $media->save();

        $createdMediaByUrl[$url] = $media->id();
        $createdMediaByUrl[$absUrl] = $media->id();
        $mediaByBundle[$bundle_id][] = $media->id();
        $mediaInfo[$media->id()] = ['fid' => $file->id(), 'alt' => $altText, 'source_field' => $source_field];
      }
    }

    // Helpers to convert URL -> File/Media on the fly.
    $downloadToFile = function (string $url) {
      try {
        if (preg_match('#^https?://#i', $url)) {
          $resp = $this->httpClient->get($url, ['timeout' => 20]);
          $data = (string) $resp->getBody();
        }
        elseif (str_starts_with($url, 'file://')) {
          $data = @file_get_contents(substr($url, 7));
        }
        else {
          return NULL;
        }
        if (!$data) return NULL;
        $filename = basename(parse_url($url, PHP_URL_PATH) ?: ('media_' . uniqid() . '.bin'));
        $file = $this->fileRepository->writeData(
          $data,
          'public://aicontent/' . $filename,
          \Drupal\Core\File\FileSystemInterface::EXISTS_RENAME
        );
        return $file;
      }
      catch (\Throwable $e) {
        \Drupal::logger('aicontent')->error('DownloadToFile failed @u: @m', ['@u' => $url, '@m' => $e->getMessage()]);
        return NULL;
      }
    };
    $createMediaFromFile = function ($fid, string $altText = 'Immagine') {
      $mtype = \Drupal\media\Entity\MediaType::load('image');
      if (!$mtype) return NULL;
      $src = $mtype->getSource()->getSourceFieldDefinition($mtype)->getName();
      $media = \Drupal\media\Entity\Media::create([
        'bundle' => 'image',
        'name' => 'img_' . $fid,
        $src => [
          'target_id' => $fid,
          'alt' => $altText,
          'title' => $altText,
        ],
        'status' => 1,
      ]);
      $media->save();
      return [$media->id(), $src, $altText];
    };

    // ===== 2) TAXONOMIES =====
    $existingVocabs = [];
    foreach ($model['taxonomies'] ?? [] as $tax) {
      $vid = $tax['vocabulary'] ?? NULL;
      if (!$vid) continue;
      $existingVocabs[$vid] = TRUE;
      foreach ($tax['terms'] ?? [] as $term_name) {
        $term_name = trim((string) $term_name);
        if ($term_name === '') continue;
        $tids = $this->entityTypeManager->getStorage('taxonomy_term')->getQuery()
          ->condition('vid', $vid)
          ->condition('name', $term_name)
          ->accessCheck(FALSE)
          ->execute();
        if (!$tids) {
          \Drupal\taxonomy\Entity\Term::create(['vid' => $vid, 'name' => $term_name])->save();
        }
      }
    }

    // Cache term ids per vocabulary.
    $termsCache = [];
    foreach (array_keys($existingVocabs) as $vid) {
      $termsCache[$vid] = [];
      $tids = $this->entityTypeManager->getStorage('taxonomy_term')->getQuery()
        ->condition('vid', $vid)
        ->accessCheck(FALSE)
        ->execute();
      if ($tids) {
        $loaded = $this->entityTypeManager->getStorage('taxonomy_term')->loadMultiple($tids);
        foreach ($loaded as $t) {
          $termsCache[$vid][mb_strtolower($t->label())] = $t->id();
        }
      }
    }

    // Heuristic to resolve a vocabulary for a given field name.
    $resolveVocabulary = function (string $fieldName) use ($model, $existingVocabs): ?string {
      $fname = strtolower($fieldName);
      if (isset($existingVocabs[$fname])) return $fname;
      $tax = $model['taxonomies'] ?? [];
      if (count($tax) === 1 && isset($tax[0]['vocabulary'])) return $tax[0]['vocabulary'];
      foreach ($tax as $t) {
        $vid = strtolower($t['vocabulary'] ?? '');
        $label = strtolower($t['label'] ?? '');
        if ($fname === 'tags' && (str_contains($vid, 'tag') || str_contains($label, 'tag'))) return $t['vocabulary'];
        if (str_contains($vid, $fname) || str_contains($label, $fname)) return $t['vocabulary'];
      }
      return NULL;
    };

    // ===== 3) NODE =====
    $entityFieldManager = $this->entityFieldManager;

    $node_id = null;
    foreach ($model['content_types'] ?? [] as $ct) {
      $bundle = $ct['type'] ?? NULL;
      if (!$bundle) continue;

      $node_values = ['type' => $bundle, 'status' => 1];
      $fieldDefs = $entityFieldManager->getFieldDefinitions('node', $bundle);

      foreach (($ct['fields'] ?? []) as $f) {
        $name = $f['name'] ?? NULL;
        if (!$name) continue;

        $is_title = ($name === 'title');
        $machine = $is_title ? 'title' : ('field_' . $name);
        $jsonType = $f['type'] ?? 'string';
        $xpaths   = $f['xpaths'] ?? [];
        $card     = $f['cardinality'] ?? 1;

        // Skip unknown fields (except title).
        if (!$is_title && !isset($fieldDefs[$machine])) {
          continue;
        }

        $fieldType  = $is_title ? 'string' : $fieldDefs[$machine]->getType();
        $targetType = $is_title ? NULL      : $fieldDefs[$machine]->getSetting('target_type');

        // === IMAGES ===
        $isImageField = ($fieldType === 'image') ||
          ($fieldType === 'entity_reference' && $targetType === 'media' && in_array(strtolower($name), ['image', 'images', 'screenshot', 'gallery', 'media']));
        if ($isImageField) {
          // Try to derive URLs from XPaths; fall back to scalar; then fallback to first media bundle.
          $urls = $allMatches($xpaths);
          if (empty($urls)) {
            $maybeUrl = $firstMatch($xpaths) ?: NULL;
            if (is_string($maybeUrl) && preg_match('#^https?://#i', $maybeUrl)) {
              $urls = [$maybeUrl];
            }
          }

          // Fallback to a previously created media item.
          if (empty($urls) && !empty($mediaByBundle['image'])) {
            if ($fieldType === 'image') {
              $mid = $mediaByBundle['image'][0];
              $node_values[$machine] = [
                'target_id' => $mediaInfo[$mid]['fid'],
                'alt' => $mediaInfo[$mid]['alt'],
                'title' => $mediaInfo[$mid]['alt'],
              ];
            }
            else {
              // entity_reference -> media.
              $node_values[$machine] = ['target_id' => $mediaByBundle['image'][0]];
            }
            continue;
          }

          // For each URL, create file/media as needed.
          $items = [];
          foreach ($urls as $u) {
            $u = $resolveUrl(trim($u));
            if ($u === '') continue;

            if ($fieldType === 'image') {
              // Image field expects file id and alt/title.
              $fid = NULL; $altText = $pageTitle ?: $defaultAlt;
              if (isset($createdMediaByUrl[$u])) {
                $mid = $createdMediaByUrl[$u];
                $fid = $mediaInfo[$mid]['fid'] ?? NULL;
                $altText = $mediaInfo[$mid]['alt'] ?? $altText;
              }
              if (!$fid) {
                $file = $downloadToFile($u);
                if ($file) {
                  $fid = $file->id();
                }
              }
              if ($fid) {
                $items[] = ['target_id' => $fid, 'alt' => $altText, 'title' => $altText];
              }
            }
            else {
              // Entity reference to media.
              $mid = $createdMediaByUrl[$u] ?? NULL;
              if (!$mid) {
                $file = $downloadToFile($u);
                if ($file) {
                  [$mid] = $createMediaFromFile($file->id(), $pageTitle ?: $defaultAlt);
                }
              }
              if ($mid) {
                $items[] = ['target_id' => $mid];
              }
            }
            if ($card == 1 && !empty($items)) break;
          }

          if (!empty($items)) {
            $node_values[$machine] = ($card == 1) ? reset($items) : $items;
          }
          continue;
        }

        // === TAXONOMIES (entity_reference -> taxonomy_term) ===
        if ($fieldType === 'entity_reference' && $targetType === 'taxonomy_term') {
          $vid = $resolveVocabulary($name);
          if ($vid) {
            $labels = $allMatches($xpaths);
            if (empty($labels)) {
              foreach ($model['taxonomies'] ?? [] as $t) {
                if (($t['vocabulary'] ?? '') === $vid) { $labels = $t['terms'] ?? []; break; }
              }
            }
            $tids = [];
            foreach ($labels as $label) {
              $key = mb_strtolower(trim((string) $label));
              if ($key === '') continue;
              if (!isset($termsCache[$vid][$key])) {
                $term = \Drupal\taxonomy\Entity\Term::create(['vid' => $vid, 'name' => trim((string) $label)]);
                $term->save();
                $termsCache[$vid][$key] = $term->id();
              }
              $tids[] = ['target_id' => $termsCache[$vid][$key]];
              if ($card == 1) break;
            }
            if ($tids) {
              $node_values[$machine] = ($card == 1) ? reset($tids) : $tids;
            }
          }
          continue;
        }

        // === SCALARS (do not treat as URLs on image/media fields) ===
        $value = $firstMatch($xpaths);
        if ($value === NULL) continue;

        switch ($jsonType) {
          case 'integer':
            $value = $cleanInt($value);
            break;

          case 'text_long':
          case 'text':
          default:
            $value = trim((string) $value);
        }

        if ($is_title) {
          $node_values['title'] = $value;
        }
        else {
          $node_values[$machine] = $value;
        }
      }

      $message = '';
      if (!$this->getDryRun()) {
        $node = \Drupal\node\Entity\Node::create($node_values);
        $node->save();
        $node_id = $node->id();
        $message =  $this->t('Content imported with the following id:') . $node_id;
      }
      else {
        $message = $this->t('dry run executed correctly');
      }
      // Only one node per model in this method.
      break;
    }
    return $message;
  }

  /**
   * Attempts to repair a loosely formatted JSON fragment.
   *
   * @param string $json
   *   The loosely formatted JSON string.
   *
   * @return string
   *   A pretty-printed valid JSON string.
   *
   * @throws \Exception
   *   Thrown when the JSON cannot be repaired.
   */
  function fixJson(string $json): string {
    // Remove UTF-8 BOM if present.
    $json = preg_replace('/^\xEF\xBB\xBF/', '', $json);

    // Escape stray backslashes not followed by a valid JSON escape.
    $json = preg_replace('/\\\\(?!["\\\\\/bfnrtu])/', '\\\\', $json);

    // Remove trailing commas before object/array closures.
    $json = preg_replace('/,(\s*[}\]])/', '$1', $json);

    // Collapse multiple closing brackets when followed by a new property.
    $json = preg_replace('/\]{2,}(?=,\s*"[A-Za-z0-9_]+"\s*:)/', ']', $json);
    $json = str_replace('}]}],"notes"', '}],"notes"', $json);

    $json = $this->removeOuterBracketsAndEnsureClosingBrace($json) . '}';

    // Try to decode.
    $data = json_decode($json, true);
    if (json_last_error() !== JSON_ERROR_NONE) {
      throw new \Exception('JSON still invalid: ' . json_last_error_msg());
    }

    // Re-encode in a readable way.
    return json_encode($data, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT);
  }

  /**
   * Removes the first '[' and the last ']' from text, then ensures that the
   * last non-whitespace character is '}'.
   *
   * Operates at string level; does not validate JSON semantics.
   *
   * @param string $json
   *   Input text that may contain a JSON-like block.
   *
   * @return string
   *   The adjusted string.
   */
  function removeOuterBracketsAndEnsureClosingBrace(string $json): string {
    $len = strlen($json);
    if ($len === 0) {
      return '}';
    }

    // Find first and last non-whitespace character.
    $start = 0;
    while ($start < $len && ctype_space($json[$start])) $start++;

    $end = $len - 1;
    while ($end >= 0 && ctype_space($json[$end])) $end--;

    // Remove the first '[' if present.
    $removedStart = false;
    if ($start <= $end && $json[$start] === '[') {
      $start++;
      $removedStart = true;
    }

    // Remove the last ']' if present.
    $removedEnd = false;
    if ($end >= $start && $json[$end] === ']') {
      $end--;
      $removedEnd = true;
    }

    // Recompose string (preserving outer whitespace).
    $prefix = substr($json, 0, $removedStart ? $start - 1 : $start);
    $inner = substr($json, $start, $end - $start + 1);
    $suffix = substr($json, $removedEnd ? $end + 2 : $end + 1);

    $result = $prefix . $inner . $suffix;

    // Ensure the last non-whitespace char is '}'.
    if (preg_match('/\s*$/', $result, $m)) {
      $trail = $m[0];
      $body = substr($result, 0, strlen($result) - strlen($trail));
      if ($body === '' || substr($body, -1) !== '}') {
        $body .= '}';
      }
      $result = $body + $trail;
    }
    else {
      if ($result === '' || substr($result, -1) !== '}') {
        $result .= '}';
      }
    }

    return $result;
  }

  // Example usage:
  // $input = "[\n  {\"action\":\"refineModel\",\"model\":{\"foo\":\"bar\"}}\n]\n";
  // echo removeOuterBracketsAndEnsureClosingBrace($input);
  // Output ends with '}'.

  /**
   * Decodes a model payload that may be a ChatMessage or a raw JSON string.
   *
   * Normalizes fences and BOMs, then tries to decode JSON; if it fails, it
   * attempts a repair via fixJson().
   *
   * @param string|\Drupal\ai\OperationType\Chat\ChatMessage $raw
   *   The raw model payload.
   *
   * @return array
   *   The decoded array.
   */
  protected function decodeModelPayload($raw): array {
    if (!is_string($raw) && $raw instanceof \Drupal\ai\OperationType\Chat\ChatMessage) {
      $raw = $raw->getText();
    }
    // Normalize string (BOM, code fences, spaces).
    $raw = preg_replace('/^\xEF\xBB\xBF/', '', $raw);
    $raw = preg_replace('/^\s*```(?:json)?\s*|\s*```\s*$/m', '', $raw);
    $raw = trim($raw);

    // First attempt: direct decode.
    $decoded = $this->tryJsonDecode($raw);
    if ($decoded === null) {
      $decoded = $this->fixJson($raw);
      return json_decode($decoded, TRUE);
    }

    return [];
  }

  /**
   * Wrapper around json_decode with safer flags.
   *
   * @param string $s
   *   JSON string.
   *
   * @return mixed|null
   *   Decoded value or NULL on error.
   */
  private function tryJsonDecode(string $s) {
    if (defined('JSON_THROW_ON_ERROR')) {
      try {
        return json_decode($s, true, 512, JSON_THROW_ON_ERROR | JSON_INVALID_UTF8_SUBSTITUTE);
      }
      catch (\Throwable $e) {
        return null;
      }
    }
    $result = json_decode($s, true);
    return (json_last_error() === JSON_ERROR_NONE) ? $result : null;
  }

  /**
   * Extracts the first balanced JSON block ({...} or [...]) ignoring strings.
   *
   * @param string $s
   *   Input text.
   *
   * @return string|null
   *   The substring containing the first JSON block, or NULL.
   */
  private function extractFirstJsonBlock(string $s): ?string {
    $len = strlen($s);
    $start = -1;
    $stack = [];
    $inString = false;
    $escape = false;

    for ($i = 0; $i < $len; $i++) {
      $ch = $s[$i];

      if ($inString) {
        if ($escape) {
          $escape = false;
        }
        elseif ($ch === '\\') {
          $escape = true;
        }
        elseif ($ch === '"') {
          $inString = false;
        }
        continue;
      }

      if ($ch === '"') {
        $inString = true;
        continue;
      }

      if ($ch === '{' || $ch === '[') {
        if ($start === -1) {
          $start = $i;
        }
        $stack[] = ($ch === '{') ? '}' : ']';
        continue;
      }

      if (($ch === '}' || $ch === ']') && !empty($stack)) {
        $expected = array_pop($stack);
        if ($ch !== $expected) {
          return null;
        }
        if (empty($stack) && $start !== -1) {
          return substr($s, $start, $i - $start + 1);
        }
      }
    }
    return null;
  }

  /**
   * Enqueues migration jobs when multiple pages are provided.
   *
   * @param array $modelToCreate
   *   The decoded model used by each queued job.
   *
   * @return \Drupal\Core\StringTranslation\TranslatableMarkup
   *   A message indicating the number of queued items.
   */
  protected function createQueue(array $modelToCreate) {
    $pages = $this->getPages();
    $num = count($pages);
    foreach ($pages as $singleurl) {
      // Use the Queue API to enqueue a single job.
      $queue = $this->queueFactory->get('ai_content_migrate.import_content');
      $queue->createItem([
        'url' => $singleurl,
        'model' => $modelToCreate,
        'queued_at' => $this->time->getRequestTime(),
      ]);
    }
    return $this->t("created queue of $num elements, run drupal standard cron to execute the migration");
  }

  /**
   * Retrieves fully rendered HTML for a URL using headless Chromium.
   *
   * @param string $url
   *   The URL to fetch.
   *
   * @return string
   *   The rendered page HTML, or an empty string on failure.
   */
  private function retrieveContentHtml(string $url) {
    $browser = null;
    try {
      // Start Chrome/Chromium in headless mode.
      $chromePath = getenv('CHROME_PATH') ?: '/usr/bin/chromium';
      $factory = new BrowserFactory($chromePath);
      $browser = $factory->createBrowser([
        'headless' => true,
        'userAgent' => 'Drupal Crawler/1.0',
        'customFlags' => [
          '--no-sandbox',
          '--disable-gpu',
          '--disable-dev-shm-usage',
        ],
      ]);

      $page = $browser->createPage();

      // Navigate and wait for JS-heavy pages.
      $timeoutMs = 5000; // 5 seconds.
      $page->navigate($url)->waitForNavigation('networkIdle', $timeoutMs);

      // Get post-render HTML.
      $contentHtml = (string) $page
        ->evaluate('document.documentElement.outerHTML')
        ->getReturnValue();

    }
    catch (\Throwable $e) {
      $this->logger->error($e->getMessage());
      $contentHtml = '';
    }
    finally {
      if ($browser) {
        try {
          $browser->close();
        }
        catch (\Throwable $ignore) {
          // Intentionally ignore errors while closing the browser.
        }
      }
    }
    return $contentHtml;
  }

}
