<?php

namespace Drupal\rijksvideo\Service;

use Drupal\Component\Utility\Html;

/**
 * Service to parse and validate Rijksvideo embed codes.
 */
class RijksvideoEmbedParser {

  /**
   * Parse iframe embed code or video ID and extract video metadata.
   *
   * @param string $embedCode
   *   The iframe embed code OR a video ID.
   *
   * @return array
   *   Array containing:
   *   - video_url: Direct URL to video file
   *   - poster_url: Direct URL to poster/thumbnail image
   *   - caption_url: Direct URL to caption/subtitle file
   *   - caption_label: Label for caption track
   *   - player_url: Base player URL
   *   - iframe_attributes: Additional iframe attributes
   *   - raw_embed: Original embed code
   *   - detected_urls: Auto-detected related files (audio, transcription, etc.)
   */
  public function parseEmbedCode(string $embedCode): array {
    $result = [
      'video_url' => '',
      'poster_url' => '',
      'caption_url' => '',
      'caption_label' => '',
      'player_url' => '',
      'iframe_attributes' => [],
      'raw_embed' => $embedCode,
      'detected_urls' => [],
    ];

    if (empty($embedCode)) {
      return $result;
    }

    // Check if input is a video ID (short alphanumeric string without HTML).
    $embedCode = trim($embedCode);
    if ($this->isVideoId($embedCode)) {
      return $this->parseFromVideoId($embedCode);
    }

    // Extract iframe element and src attribute.
    $iframe = $this->extractIframeElement($embedCode);
    if (!$iframe) {
      return $result;
    }

    $src = $iframe->getAttribute('src');
    if (empty($src)) {
      return $result;
    }

    // Parse URL components.
    $urlData = $this->parseUrlComponents($src);
    if (!$urlData) {
      return $result;
    }

    $result['player_url'] = $urlData['player_url'];
    $params = $urlData['params'];
    $parsed = $urlData['parsed'];

    // Extract URLs from query parameters.
    $result['video_url'] = $this->extractVideoUrl($params, $parsed);
    $result['poster_url'] = $this->extractPosterUrl($params, $parsed);

    $captionData = $this->extractCaptionData($params, $parsed);
    $result['caption_url'] = $captionData['url'];
    $result['caption_label'] = $captionData['label'];

    // Auto-detect related files based on video URL pattern.
    if (!empty($result['video_url'])) {
      $result['detected_urls'] = $this->detectRelatedFiles($result['video_url']);
    }

    // Extract iframe attributes.
    $result['iframe_attributes'] = $this->extractIframeAttributes($iframe);

    return $result;
  }

  /**
   * Extract iframe element from HTML embed code.
   *
   * @param string $embedCode
   *   The HTML embed code.
   *
   * @return \DOMElement|null
   *   The iframe element or NULL if not found.
   */
  protected function extractIframeElement(string $embedCode): ?\DOMElement {
    $dom = new \DOMDocument();
    // Suppress warnings for malformed HTML.
    @$dom->loadHTML($embedCode, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);

    $iframes = $dom->getElementsByTagName('iframe');
    if ($iframes->length === 0) {
      return NULL;
    }

    return $iframes->item(0);
  }

  /**
   * Parse URL components from iframe src.
   *
   * @param string $src
   *   The iframe src URL.
   *
   * @return array|null
   *   Array with 'parsed', 'player_url', and 'params', or NULL on failure.
   */
  protected function parseUrlComponents(string $src): ?array {
    $parsed = parse_url($src);

    if (!$parsed || !isset($parsed['query'])) {
      return NULL;
    }

    // Extract base player URL (scheme + host + path).
    $playerUrl = $parsed['scheme'] . '://' . $parsed['host'] . $parsed['path'];

    // Parse query parameters.
    parse_str($parsed['query'], $params);

    return [
      'parsed' => $parsed,
      'player_url' => $playerUrl,
      'params' => $params,
    ];
  }

  /**
   * Extract video URL from query parameters.
   *
   * @param array $params
   *   Query parameters.
   * @param array $parsed
   *   Parsed URL components.
   *
   * @return string
   *   The video URL or empty string.
   */
  protected function extractVideoUrl(array $params, array $parsed): string {
    if (!isset($params['v'])) {
      return '';
    }

    $videoPath = urldecode($params['v']);
    // Remove any trailing quotes or malformed characters.
    $videoPath = trim($videoPath, '"\'');

    // Construct full video URL.
    // The video path is relative to the host.
    return $parsed['scheme'] . '://' . $parsed['host'] . '/' . ltrim($videoPath, '/');
  }

  /**
   * Extract poster URL from query parameters.
   *
   * @param array $params
   *   Query parameters.
   * @param array $parsed
   *   Parsed URL components.
   *
   * @return string
   *   The poster URL or empty string.
   */
  protected function extractPosterUrl(array $params, array $parsed): string {
    if (!isset($params['p'])) {
      return '';
    }

    $posterPath = urldecode($params['p']);
    // Remove any trailing quotes or malformed characters.
    $posterPath = trim($posterPath, '"\'');

    // Construct full poster URL.
    return $parsed['scheme'] . '://' . $parsed['host'] . '/' . ltrim($posterPath, '/');
  }

  /**
   * Extract caption data from query parameters.
   *
   * @param array $params
   *   Query parameters.
   * @param array $parsed
   *   Parsed URL components.
   *
   * @return array
   *   Array with 'url' and 'label' keys.
   */
  protected function extractCaptionData(array $params, array $parsed): array {
    $result = [
      'url' => '',
      'label' => '',
    ];

    if (!isset($params['s'])) {
      return $result;
    }

    $subtitleData = urldecode($params['s']);
    $subtitleData = trim($subtitleData, '"\'');

    // Split by pipe if present.
    // Format: URL|Label (e.g., "/path/to/file.vtt|Ondertiteling").
    $captionPath = $subtitleData;
    $result['label'] = 'Subtitles';

    if (strpos($subtitleData, '|') !== FALSE) {
      [$captionPath, $captionLabel] = explode('|', $subtitleData, 2);
      $result['label'] = trim($captionLabel);
    }

    // Construct full caption URL.
    $result['url'] = $parsed['scheme'] . '://' . $parsed['host'] . '/' . ltrim($captionPath, '/');

    return $result;
  }

  /**
   * Extract iframe attributes.
   *
   * @param \DOMElement $iframe
   *   The iframe element.
   *
   * @return array
   *   Array of iframe attributes.
   */
  protected function extractIframeAttributes(\DOMElement $iframe): array {
    $attributes = [
      'width' => $iframe->getAttribute('width'),
      'height' => $iframe->getAttribute('height'),
      'frameborder' => $iframe->getAttribute('frameborder'),
      'scrolling' => $iframe->getAttribute('scrolling'),
      'allow' => $iframe->getAttribute('allow'),
      'allowfullscreen' => $iframe->hasAttribute('allowfullscreen'),
    ];

    return array_filter($attributes);
  }

  /**
   * Check if input is a video ID rather than embed code.
   *
   * @param string $input
   *   The input string.
   *
   * @return bool
   *   TRUE if it's a video ID, FALSE otherwise.
   */
  protected function isVideoId(string $input): bool {
    // Video IDs are typically short alphanumeric strings without spaces or HTML
    // Example: s62t6fv, id6rizmik.
    return preg_match('/^[a-z0-9]{6,12}$/i', $input);
  }

  /**
   * Parse video metadata from video ID.
   *
   * Note: Currently not supported as we need the full path structure.
   * This method exists for future enhancement when beeldbank API is available.
   *
   * @param string $videoId
   *   The video ID.
   *
   * @return array
   *   Parsed metadata array (empty for now).
   */
  protected function parseFromVideoId(string $videoId): array {
    // Without a beeldbank API, we can't determine the full file path
    // from just the video ID. The path structure is:
    // {dept}/{subdept}/{year}/{dept}-{subdept}-{date}-{id}-{version}.{ext}
    //
    // For now, return empty result which will trigger validation error
    // This can be implemented when beeldbank API becomes available.
    return [
      'video_url' => '',
      'poster_url' => '',
      'caption_url' => '',
      'caption_label' => '',
      'player_url' => '',
      'iframe_attributes' => [],
      'raw_embed' => $videoId,
      'detected_urls' => [],
    ];
  }

  /**
   * Detect related files based on video URL naming convention.
   *
   * @param string $videoUrl
   *   The video URL.
   *
   * @return array
   *   Array of detected URLs:
   *   - audio_url: URL to audio description file (.mp3)
   *   - transcription_url: URL to transcription text file (.txt)
   *   - caption_vtt_url: URL to VTT caption file
   *   - caption_srt_url: URL to SRT caption file
   */
  protected function detectRelatedFiles(string $videoUrl): array {
    $detected = [
      'audio_url' => '',
      'transcription_url' => '',
      'caption_vtt_url' => '',
      'caption_srt_url' => '',
    ];

    // Extract base URL pattern
    // Example: https://wwwtest.rovid.nl/rivm/aco/2025/rivm-aco-20250822-id6rizmik-web-hd.mp4
    // Base: https://wwwtest.rovid.nl/rivm/aco/2025/rivm-aco-20250822-id6rizmik
    $pattern = preg_replace('/-(web-hd|bron)\.(mp4|mxf)$/i', '', $videoUrl);

    if (!empty($pattern)) {
      // Construct related file URLs based on naming convention.
      $detected['audio_url'] = $pattern . '-audio.mp3';
      $detected['transcription_url'] = $pattern . '-tekst.txt';
      $detected['caption_vtt_url'] = $pattern . '-ondertiteling.vtt';
      $detected['caption_srt_url'] = $pattern . '-ondertiteling.srt';
    }

    return $detected;
  }

  /**
   * Validate embed code structure.
   *
   * @param string $embedCode
   *   The embed code to validate.
   *
   * @return bool
   *   TRUE if valid, FALSE otherwise.
   */
  public function validateEmbed(string $embedCode): bool {
    if (empty($embedCode)) {
      return FALSE;
    }

    // Check if it contains an iframe tag.
    if (stripos($embedCode, '<iframe') === FALSE) {
      return FALSE;
    }

    // Try to parse it.
    $parsed = $this->parseEmbedCode($embedCode);

    // Must have at least a video URL.
    if (empty($parsed['video_url'])) {
      return FALSE;
    }

    return TRUE;
  }

  /**
   * Generate a title from video URL.
   *
   * @param string $videoUrl
   *   The video URL.
   *
   * @return string
   *   Generated title from filename.
   */
  public function generateTitleFromVideoUrl(string $videoUrl): string {
    if (empty($videoUrl)) {
      return '';
    }

    // Extract filename from URL.
    $path = parse_url($videoUrl, PHP_URL_PATH);
    $filename = basename($path);

    // Remove extension.
    $title = preg_replace('/\.(mp4|webm|ogv|mov)$/i', '', $filename);

    // Replace URL-encoded characters and separators with spaces.
    $title = str_replace(['+', '-', '_', '%20'], ' ', $title);
    $title = urldecode($title);

    // Clean up and capitalize.
    $title = Html::decodeEntities($title);
    $title = ucwords(trim($title));

    return $title;
  }

  /**
   * Sanitize embed code for safe output.
   *
   * @param string $embedCode
   *   The embed code to sanitize.
   *
   * @return string
   *   Sanitized embed code.
   */
  public function sanitizeEmbedCode(string $embedCode): string {
    // Parse and rebuild to ensure only iframe with safe attributes.
    $parsed = $this->parseEmbedCode($embedCode);

    if (empty($parsed['video_url'])) {
      return '';
    }

    // Rebuild safe iframe.
    $src = $parsed['player_url'] . '?v=' . urlencode(parse_url($parsed['video_url'], PHP_URL_PATH));
    if (!empty($parsed['poster_url'])) {
      $src .= '&p=' . urlencode(parse_url($parsed['poster_url'], PHP_URL_PATH));
    }
    if (!empty($parsed['caption_url'])) {
      $captionPath = parse_url($parsed['caption_url'], PHP_URL_PATH);
      $captionParam = $captionPath;
      if (!empty($parsed['caption_label'])) {
        $captionParam .= '|' . $parsed['caption_label'];
      }
      $src .= '&s=' . urlencode($captionParam);
    }

    $attrs = $parsed['iframe_attributes'];
    $width = $attrs['width'] ?? '560';
    $height = $attrs['height'] ?? '315';
    $allow = $attrs['allow'] ?? 'picture-in-picture';
    $allowfullscreen = isset($attrs['allowfullscreen']) ? ' allowfullscreen' : '';

    $iframe = sprintf(
      '<iframe src="%s" width="%s" height="%s" frameborder="0" scrolling="no" allow="%s"%s></iframe>',
      htmlspecialchars($src, ENT_QUOTES, 'UTF-8'),
      htmlspecialchars($width, ENT_QUOTES, 'UTF-8'),
      htmlspecialchars($height, ENT_QUOTES, 'UTF-8'),
      htmlspecialchars($allow, ENT_QUOTES, 'UTF-8'),
      $allowfullscreen
    );

    return $iframe;
  }

}
