Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.
<?php

namespace Box\Spout\Common\Helper;

use Box\Spout\Common\Exception\EncodingConversionException;

/**
 * Class EncodingHelper
 * This class provides helper functions to work with encodings.
 */
class EncodingHelper
{
    /** Definition of the encodings that can have a BOM */
    const ENCODING_UTF8     = 'UTF-8';
    const ENCODING_UTF16_LE = 'UTF-16LE';
    const ENCODING_UTF16_BE = 'UTF-16BE';
    const ENCODING_UTF32_LE = 'UTF-32LE';
    const ENCODING_UTF32_BE = 'UTF-32BE';

    /** Definition of the BOMs for the different encodings */
    const BOM_UTF8     = "\xEF\xBB\xBF";
    const BOM_UTF16_LE = "\xFF\xFE";
    const BOM_UTF16_BE = "\xFE\xFF";
    const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
    const BOM_UTF32_BE = "\x00\x00\xFE\xFF";

    /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
    protected $globalFunctionsHelper;

    /** @var array Map representing the encodings supporting BOMs (key) and their associated BOM (value) */
    protected $supportedEncodingsWithBom;

    /**
     * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
     */
    public function __construct($globalFunctionsHelper)
    {
        $this->globalFunctionsHelper = $globalFunctionsHelper;

        $this->supportedEncodingsWithBom = [
            self::ENCODING_UTF8     => self::BOM_UTF8,
            self::ENCODING_UTF16_LE => self::BOM_UTF16_LE,
            self::ENCODING_UTF16_BE => self::BOM_UTF16_BE,
            self::ENCODING_UTF32_LE => self::BOM_UTF32_LE,
            self::ENCODING_UTF32_BE => self::BOM_UTF32_BE,
        ];
    }

    /**
     * Returns the number of bytes to use as offset in order to skip the BOM.
     *
     * @param resource $filePointer Pointer to the file to check
     * @param string $encoding Encoding of the file to check
     * @return int Bytes offset to apply to skip the BOM (0 means no BOM)
     */
    public function getBytesOffsetToSkipBOM($filePointer, $encoding)
    {
        $byteOffsetToSkipBom = 0;

        if ($this->hasBOM($filePointer, $encoding)) {
            $bomUsed = $this->supportedEncodingsWithBom[$encoding];

            // we skip the N first bytes
< $byteOffsetToSkipBom = strlen($bomUsed);
> $byteOffsetToSkipBom = \strlen($bomUsed);
} return $byteOffsetToSkipBom; } /** * Returns whether the file identified by the given pointer has a BOM. * * @param resource $filePointer Pointer to the file to check * @param string $encoding Encoding of the file to check * @return bool TRUE if the file has a BOM, FALSE otherwise */ protected function hasBOM($filePointer, $encoding) { $hasBOM = false; $this->globalFunctionsHelper->rewind($filePointer);
< if (array_key_exists($encoding, $this->supportedEncodingsWithBom)) {
> if (\array_key_exists($encoding, $this->supportedEncodingsWithBom)) {
$potentialBom = $this->supportedEncodingsWithBom[$encoding];
< $numBytesInBom = strlen($potentialBom);
> $numBytesInBom = \strlen($potentialBom);
$hasBOM = ($this->globalFunctionsHelper->fgets($filePointer, $numBytesInBom + 1) === $potentialBom); } return $hasBOM; } /** * Attempts to convert a non UTF-8 string into UTF-8. * * @param string $string Non UTF-8 string to be converted * @param string $sourceEncoding The encoding used to encode the source string * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed * @return string The converted, UTF-8 string */ public function attemptConversionToUTF8($string, $sourceEncoding) { return $this->attemptConversion($string, $sourceEncoding, self::ENCODING_UTF8); } /** * Attempts to convert a UTF-8 string into the given encoding. * * @param string $string UTF-8 string to be converted * @param string $targetEncoding The encoding the string should be re-encoded into * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed * @return string The converted string, encoded with the given encoding */ public function attemptConversionFromUTF8($string, $targetEncoding) { return $this->attemptConversion($string, self::ENCODING_UTF8, $targetEncoding); } /** * Attempts to convert the given string to the given encoding. * Depending on what is installed on the server, we will try to iconv or mbstring. * * @param string $string string to be converted * @param string $sourceEncoding The encoding used to encode the source string * @param string $targetEncoding The encoding the string should be re-encoded into * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed * @return string The converted string, encoded with the given encoding */ protected function attemptConversion($string, $sourceEncoding, $targetEncoding) { // if source and target encodings are the same, it's a no-op if ($sourceEncoding === $targetEncoding) { return $string; } $convertedString = null; if ($this->canUseIconv()) { $convertedString = $this->globalFunctionsHelper->iconv($string, $sourceEncoding, $targetEncoding); } elseif ($this->canUseMbString()) { $convertedString = $this->globalFunctionsHelper->mb_convert_encoding($string, $sourceEncoding, $targetEncoding); } else { throw new EncodingConversionException("The conversion from $sourceEncoding to $targetEncoding is not supported. Please install \"iconv\" or \"PHP Intl\"."); } if ($convertedString === false) { throw new EncodingConversionException("The conversion from $sourceEncoding to $targetEncoding failed."); } return $convertedString; } /** * Returns whether "iconv" can be used. * * @return bool TRUE if "iconv" is available and can be used, FALSE otherwise */ protected function canUseIconv() { return $this->globalFunctionsHelper->function_exists('iconv'); } /** * Returns whether "mb_string" functions can be used. * These functions come with the PHP Intl package. * * @return bool TRUE if "mb_string" functions are available and can be used, FALSE otherwise */ protected function canUseMbString() { return $this->globalFunctionsHelper->function_exists('mb_convert_encoding'); } }