Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 3.9.x will end* 10 May 2021 (12 months).
  • Bug fixes for security issues in 3.9.x will end* 8 May 2023 (36 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.

Differences Between: [Versions 39 and 311] [Versions 39 and 400] [Versions 39 and 401]

   1  <?php
   2  
   3  namespace Box\Spout\Common\Helper;
   4  
   5  use Box\Spout\Common\Exception\EncodingConversionException;
   6  
   7  /**
   8   * Class EncodingHelper
   9   * This class provides helper functions to work with encodings.
  10   */
  11  class EncodingHelper
  12  {
  13      /** Definition of the encodings that can have a BOM */
  14      const ENCODING_UTF8     = 'UTF-8';
  15      const ENCODING_UTF16_LE = 'UTF-16LE';
  16      const ENCODING_UTF16_BE = 'UTF-16BE';
  17      const ENCODING_UTF32_LE = 'UTF-32LE';
  18      const ENCODING_UTF32_BE = 'UTF-32BE';
  19  
  20      /** Definition of the BOMs for the different encodings */
  21      const BOM_UTF8     = "\xEF\xBB\xBF";
  22      const BOM_UTF16_LE = "\xFF\xFE";
  23      const BOM_UTF16_BE = "\xFE\xFF";
  24      const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
  25      const BOM_UTF32_BE = "\x00\x00\xFE\xFF";
  26  
  27      /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
  28      protected $globalFunctionsHelper;
  29  
  30      /** @var array Map representing the encodings supporting BOMs (key) and their associated BOM (value) */
  31      protected $supportedEncodingsWithBom;
  32  
  33      /**
  34       * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
  35       */
  36      public function __construct($globalFunctionsHelper)
  37      {
  38          $this->globalFunctionsHelper = $globalFunctionsHelper;
  39  
  40          $this->supportedEncodingsWithBom = [
  41              self::ENCODING_UTF8     => self::BOM_UTF8,
  42              self::ENCODING_UTF16_LE => self::BOM_UTF16_LE,
  43              self::ENCODING_UTF16_BE => self::BOM_UTF16_BE,
  44              self::ENCODING_UTF32_LE => self::BOM_UTF32_LE,
  45              self::ENCODING_UTF32_BE => self::BOM_UTF32_BE,
  46          ];
  47      }
  48  
  49      /**
  50       * Returns the number of bytes to use as offset in order to skip the BOM.
  51       *
  52       * @param resource $filePointer Pointer to the file to check
  53       * @param string $encoding Encoding of the file to check
  54       * @return int Bytes offset to apply to skip the BOM (0 means no BOM)
  55       */
  56      public function getBytesOffsetToSkipBOM($filePointer, $encoding)
  57      {
  58          $byteOffsetToSkipBom = 0;
  59  
  60          if ($this->hasBOM($filePointer, $encoding)) {
  61              $bomUsed = $this->supportedEncodingsWithBom[$encoding];
  62  
  63              // we skip the N first bytes
  64              $byteOffsetToSkipBom = strlen($bomUsed);
  65          }
  66  
  67          return $byteOffsetToSkipBom;
  68      }
  69  
  70      /**
  71       * Returns whether the file identified by the given pointer has a BOM.
  72       *
  73       * @param resource $filePointer Pointer to the file to check
  74       * @param string $encoding Encoding of the file to check
  75       * @return bool TRUE if the file has a BOM, FALSE otherwise
  76       */
  77      protected function hasBOM($filePointer, $encoding)
  78      {
  79          $hasBOM = false;
  80  
  81          $this->globalFunctionsHelper->rewind($filePointer);
  82  
  83          if (array_key_exists($encoding, $this->supportedEncodingsWithBom)) {
  84              $potentialBom = $this->supportedEncodingsWithBom[$encoding];
  85              $numBytesInBom = strlen($potentialBom);
  86  
  87              $hasBOM = ($this->globalFunctionsHelper->fgets($filePointer, $numBytesInBom + 1) === $potentialBom);
  88          }
  89  
  90          return $hasBOM;
  91      }
  92  
  93      /**
  94       * Attempts to convert a non UTF-8 string into UTF-8.
  95       *
  96       * @param string $string Non UTF-8 string to be converted
  97       * @param string $sourceEncoding The encoding used to encode the source string
  98       * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
  99       * @return string The converted, UTF-8 string
 100       */
 101      public function attemptConversionToUTF8($string, $sourceEncoding)
 102      {
 103          return $this->attemptConversion($string, $sourceEncoding, self::ENCODING_UTF8);
 104      }
 105  
 106      /**
 107       * Attempts to convert a UTF-8 string into the given encoding.
 108       *
 109       * @param string $string UTF-8 string to be converted
 110       * @param string $targetEncoding The encoding the string should be re-encoded into
 111       * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
 112       * @return string The converted string, encoded with the given encoding
 113       */
 114      public function attemptConversionFromUTF8($string, $targetEncoding)
 115      {
 116          return $this->attemptConversion($string, self::ENCODING_UTF8, $targetEncoding);
 117      }
 118  
 119      /**
 120       * Attempts to convert the given string to the given encoding.
 121       * Depending on what is installed on the server, we will try to iconv or mbstring.
 122       *
 123       * @param string $string string to be converted
 124       * @param string $sourceEncoding The encoding used to encode the source string
 125       * @param string $targetEncoding The encoding the string should be re-encoded into
 126       * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
 127       * @return string The converted string, encoded with the given encoding
 128       */
 129      protected function attemptConversion($string, $sourceEncoding, $targetEncoding)
 130      {
 131          // if source and target encodings are the same, it's a no-op
 132          if ($sourceEncoding === $targetEncoding) {
 133              return $string;
 134          }
 135  
 136          $convertedString = null;
 137  
 138          if ($this->canUseIconv()) {
 139              $convertedString = $this->globalFunctionsHelper->iconv($string, $sourceEncoding, $targetEncoding);
 140          } elseif ($this->canUseMbString()) {
 141              $convertedString = $this->globalFunctionsHelper->mb_convert_encoding($string, $sourceEncoding, $targetEncoding);
 142          } else {
 143              throw new EncodingConversionException("The conversion from $sourceEncoding to $targetEncoding is not supported. Please install \"iconv\" or \"PHP Intl\".");
 144          }
 145  
 146          if ($convertedString === false) {
 147              throw new EncodingConversionException("The conversion from $sourceEncoding to $targetEncoding failed.");
 148          }
 149  
 150          return $convertedString;
 151      }
 152  
 153      /**
 154       * Returns whether "iconv" can be used.
 155       *
 156       * @return bool TRUE if "iconv" is available and can be used, FALSE otherwise
 157       */
 158      protected function canUseIconv()
 159      {
 160          return $this->globalFunctionsHelper->function_exists('iconv');
 161      }
 162  
 163      /**
 164       * Returns whether "mb_string" functions can be used.
 165       * These functions come with the PHP Intl package.
 166       *
 167       * @return bool TRUE if "mb_string" functions are available and can be used, FALSE otherwise
 168       */
 169      protected function canUseMbString()
 170      {
 171          return $this->globalFunctionsHelper->function_exists('mb_convert_encoding');
 172      }
 173  }