<?php

namespace Drupal\pdf_sanitizer;

use Drupal\Core\File\FileSystemInterface;
use Drupal\Core\Logger\LoggerChannelFactoryInterface;
use Symfony\Component\Process\Exception\ProcessFailedException;

/**
 * Service for sanitizing PDF files.
 *
 * This service provides functionality to sanitize PDF files by removing
 * potentially malicious content, such as JavaScript actions, using Ghostscript.
 */
class PdfSanitizer {

  /**
   * The logger factory.
   *
   * @var \Drupal\Core\Logger\LoggerChannelFactoryInterface
   */
  protected $loggerFactory;

  /**
   * The process factory.
   *
   * @var \Drupal\pdf_sanitizer\ProcessFactory
   */
  protected $processFactory;

  /**
   * The file system service.
   *
   * @var \Drupal\Core\File\FileSystemInterface
   */
  protected $fileSystem;

  /**
   * Constructs a new PdfSanitizer object.
   *
   * @param \Drupal\Core\Logger\LoggerChannelFactoryInterface $logger_factory
   *   The logger factory service.
   * @param \Drupal\pdf_sanitizer\ProcessFactory $process_factory
   *   The process factory service for creating Symfony Process objects.
   * @param \Drupal\Core\File\FileSystemInterface $file_system
   *   The file system service.
   */
  public function __construct(LoggerChannelFactoryInterface $logger_factory, ProcessFactory $process_factory, FileSystemInterface $file_system) {
    $this->loggerFactory = $logger_factory;
    $this->processFactory = $process_factory;
    $this->fileSystem = $file_system;
  }

  /**
   * Sanitizes a PDF file by removing actions.
   *
   * This method uses Ghostscript to process the input PDF file and create
   * a sanitized version. It specifically targets the removal of JavaScript
   * actions and other potentially harmful content.
   *
   * @param string $input_path
   *   The absolute path to the input PDF file.
   *
   * @return string|bool
   *   The absolute path to the temporary sanitized PDF file if successful,
   *   FALSE otherwise. The caller is responsible for moving or deleting
   *   this temporary file.
   */
  public function sanitizePdf(string $input_path): string|bool {
    // Create a temporary output file in the system's temporary directory.
    $temp_output_path = $this->fileSystem->getTempDirectory() . '/' . uniqid('sanitized_pdf_') . '.pdf';

    // Construct the Ghostscript command as a single string.
    // The -sOutputFile parameter specifies the output file path.
    // The input file path is also passed as an argument.
    $gs_command_string = sprintf(
      'gs -sDEVICE=pdfwrite -dNOPAUSE -dBATCH -dQUIET -sOutputFile=%s %s',
      escapeshellarg($temp_output_path),
      escapeshellarg($input_path)
    );

    // Wrap the Ghostscript command in bash -c to ensure proper parsing
    // and execution of the command string, especially with escaped arguments.
    $command = ['bash', '-c', $gs_command_string];

    /** @var \Symfony\Component\Process\Process $process */
    // Create a new Process object with the command and set the working dir
    // to the system's temporary directory for Ghostscript operations.
    $process = $this->processFactory->create(
      $command,
      $this->fileSystem->getTempDirectory()
    );

    // Log the exact command line that will be executed for debugging purposes.
    $this->loggerFactory->get('pdf_sanitizer')->info(
      'Attempting to execute Ghostscript command: {command}',
      ['command' => $process->getCommandLine()]
    );

    try {
      // Execute the Ghostscript command.
      $process->mustRun();
      // If successful, return the path to the temporary sanitized file.
      return $temp_output_path;
    }
    catch (ProcessFailedException $exception) {
      // Log detailed error information if Ghostscript command fails.
      $this->loggerFactory->get('pdf_sanitizer')->error(
        'PDF sanitization failed. Command: {command}. Output: {output}. '
        . 'Error Output: {error_output}. Message: {message}',
        [
          'command' => $process->getCommandLine(),
          'output' => $process->getOutput(),
          'error_output' => $process->getErrorOutput(),
          'message' => $exception->getMessage(),
        ]
      );
      return FALSE;
    }
  }

}
