Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.

Differences Between: [Versions 402 and 403]

   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace OpenSpout\Common\Helper;
   6  
   7  use Error;
   8  use OpenSpout\Common\Exception\EncodingConversionException;
   9  
  10  /**
  11   * @internal
  12   */
  13  final class EncodingHelper
  14  {
  15      /**
  16       * Definition of the encodings that can have a BOM.
  17       */
  18      public const ENCODING_UTF8 = 'UTF-8';
  19      public const ENCODING_UTF16_LE = 'UTF-16LE';
  20      public const ENCODING_UTF16_BE = 'UTF-16BE';
  21      public const ENCODING_UTF32_LE = 'UTF-32LE';
  22      public const ENCODING_UTF32_BE = 'UTF-32BE';
  23  
  24      /**
  25       * Definition of the BOMs for the different encodings.
  26       */
  27      public const BOM_UTF8 = "\xEF\xBB\xBF";
  28      public const BOM_UTF16_LE = "\xFF\xFE";
  29      public const BOM_UTF16_BE = "\xFE\xFF";
  30      public const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
  31      public const BOM_UTF32_BE = "\x00\x00\xFE\xFF";
  32  
  33      /** @var array<string, string> Map representing the encodings supporting BOMs (key) and their associated BOM (value) */
  34      private array $supportedEncodingsWithBom;
  35  
  36      private bool $canUseIconv;
  37  
  38      private bool $canUseMbString;
  39  
  40      public function __construct(bool $canUseIconv, bool $canUseMbString)
  41      {
  42          $this->canUseIconv = $canUseIconv;
  43          $this->canUseMbString = $canUseMbString;
  44  
  45          $this->supportedEncodingsWithBom = [
  46              self::ENCODING_UTF8 => self::BOM_UTF8,
  47              self::ENCODING_UTF16_LE => self::BOM_UTF16_LE,
  48              self::ENCODING_UTF16_BE => self::BOM_UTF16_BE,
  49              self::ENCODING_UTF32_LE => self::BOM_UTF32_LE,
  50              self::ENCODING_UTF32_BE => self::BOM_UTF32_BE,
  51          ];
  52      }
  53  
  54      public static function factory(): self
  55      {
  56          return new self(
  57              \function_exists('iconv'),
  58              \function_exists('mb_convert_encoding'),
  59          );
  60      }
  61  
  62      /**
  63       * Returns the number of bytes to use as offset in order to skip the BOM.
  64       *
  65       * @param resource $filePointer Pointer to the file to check
  66       * @param string   $encoding    Encoding of the file to check
  67       *
  68       * @return int Bytes offset to apply to skip the BOM (0 means no BOM)
  69       */
  70      public function getBytesOffsetToSkipBOM($filePointer, string $encoding): int
  71      {
  72          $byteOffsetToSkipBom = 0;
  73  
  74          if ($this->hasBOM($filePointer, $encoding)) {
  75              $bomUsed = $this->supportedEncodingsWithBom[$encoding];
  76  
  77              // we skip the N first bytes
  78              $byteOffsetToSkipBom = \strlen($bomUsed);
  79          }
  80  
  81          return $byteOffsetToSkipBom;
  82      }
  83  
  84      /**
  85       * Attempts to convert a non UTF-8 string into UTF-8.
  86       *
  87       * @param string $string         Non UTF-8 string to be converted
  88       * @param string $sourceEncoding The encoding used to encode the source string
  89       *
  90       * @return string The converted, UTF-8 string
  91       *
  92       * @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
  93       */
  94      public function attemptConversionToUTF8(?string $string, string $sourceEncoding): ?string
  95      {
  96          return $this->attemptConversion($string, $sourceEncoding, self::ENCODING_UTF8);
  97      }
  98  
  99      /**
 100       * Attempts to convert a UTF-8 string into the given encoding.
 101       *
 102       * @param string $string         UTF-8 string to be converted
 103       * @param string $targetEncoding The encoding the string should be re-encoded into
 104       *
 105       * @return string The converted string, encoded with the given encoding
 106       *
 107       * @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
 108       */
 109      public function attemptConversionFromUTF8(?string $string, string $targetEncoding): ?string
 110      {
 111          return $this->attemptConversion($string, self::ENCODING_UTF8, $targetEncoding);
 112      }
 113  
 114      /**
 115       * Returns whether the file identified by the given pointer has a BOM.
 116       *
 117       * @param resource $filePointer Pointer to the file to check
 118       * @param string   $encoding    Encoding of the file to check
 119       *
 120       * @return bool TRUE if the file has a BOM, FALSE otherwise
 121       */
 122      private function hasBOM($filePointer, string $encoding): bool
 123      {
 124          $hasBOM = false;
 125  
 126          rewind($filePointer);
 127  
 128          if (\array_key_exists($encoding, $this->supportedEncodingsWithBom)) {
 129              $potentialBom = $this->supportedEncodingsWithBom[$encoding];
 130              $numBytesInBom = \strlen($potentialBom);
 131  
 132              $hasBOM = (fgets($filePointer, $numBytesInBom + 1) === $potentialBom);
 133          }
 134  
 135          return $hasBOM;
 136      }
 137  
 138      /**
 139       * Attempts to convert the given string to the given encoding.
 140       * Depending on what is installed on the server, we will try to iconv or mbstring.
 141       *
 142       * @param string $string         string to be converted
 143       * @param string $sourceEncoding The encoding used to encode the source string
 144       * @param string $targetEncoding The encoding the string should be re-encoded into
 145       *
 146       * @return string The converted string, encoded with the given encoding
 147       *
 148       * @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
 149       */
 150      private function attemptConversion(?string $string, string $sourceEncoding, string $targetEncoding): ?string
 151      {
 152          // if source and target encodings are the same, it's a no-op
 153          if (null === $string || $sourceEncoding === $targetEncoding) {
 154              return $string;
 155          }
 156  
 157          $convertedString = null;
 158  
 159          if ($this->canUseIconv) {
 160              set_error_handler(static function (): bool {
 161                  return true;
 162              });
 163  
 164              $convertedString = iconv($sourceEncoding, $targetEncoding, $string);
 165  
 166              restore_error_handler();
 167          } elseif ($this->canUseMbString) {
 168              $errorMessage = null;
 169              set_error_handler(static function ($nr, $message) use (&$errorMessage): bool {
 170                  $errorMessage = $message; // @codeCoverageIgnore
 171  
 172                  return true; // @codeCoverageIgnore
 173              });
 174  
 175              try {
 176                  $convertedString = mb_convert_encoding($string, $targetEncoding, $sourceEncoding);
 177              } catch (Error $error) {
 178                  $errorMessage = $error->getMessage();
 179              }
 180  
 181              restore_error_handler();
 182              if (null !== $errorMessage) {
 183                  $convertedString = false;
 184              }
 185          } else {
 186              throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} is not supported. Please install \"iconv\" or \"PHP Intl\".");
 187          }
 188  
 189          if (false === $convertedString) {
 190              throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} failed.");
 191          }
 192  
 193          return $convertedString;
 194      }
 195  }