Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.3.x will end 7 October 2024 (12 months).
  • Bug fixes for security issues in 4.3.x will end 21 April 2025 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.2.x is supported too.
<?php

declare(strict_types=1);

namespace OpenSpout\Common\Helper;

use Error;
use OpenSpout\Common\Exception\EncodingConversionException;

/**
 * @internal
 */
final class EncodingHelper
{
    /**
     * Definition of the encodings that can have a BOM.
     */
    public const ENCODING_UTF8 = 'UTF-8';
    public const ENCODING_UTF16_LE = 'UTF-16LE';
    public const ENCODING_UTF16_BE = 'UTF-16BE';
    public const ENCODING_UTF32_LE = 'UTF-32LE';
    public const ENCODING_UTF32_BE = 'UTF-32BE';

    /**
     * Definition of the BOMs for the different encodings.
     */
    public const BOM_UTF8 = "\xEF\xBB\xBF";
    public const BOM_UTF16_LE = "\xFF\xFE";
    public const BOM_UTF16_BE = "\xFE\xFF";
    public const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
    public const BOM_UTF32_BE = "\x00\x00\xFE\xFF";

    /** @var array<string, string> Map representing the encodings supporting BOMs (key) and their associated BOM (value) */
    private array $supportedEncodingsWithBom;

    private bool $canUseIconv;

    private bool $canUseMbString;

    public function __construct(bool $canUseIconv, bool $canUseMbString)
    {
        $this->canUseIconv = $canUseIconv;
        $this->canUseMbString = $canUseMbString;

        $this->supportedEncodingsWithBom = [
            self::ENCODING_UTF8 => self::BOM_UTF8,
            self::ENCODING_UTF16_LE => self::BOM_UTF16_LE,
            self::ENCODING_UTF16_BE => self::BOM_UTF16_BE,
            self::ENCODING_UTF32_LE => self::BOM_UTF32_LE,
            self::ENCODING_UTF32_BE => self::BOM_UTF32_BE,
        ];
    }

    public static function factory(): self
    {
        return new self(
            \function_exists('iconv'),
            \function_exists('mb_convert_encoding'),
        );
    }

    /**
     * Returns the number of bytes to use as offset in order to skip the BOM.
     *
     * @param resource $filePointer Pointer to the file to check
     * @param string   $encoding    Encoding of the file to check
     *
     * @return int Bytes offset to apply to skip the BOM (0 means no BOM)
     */
    public function getBytesOffsetToSkipBOM($filePointer, string $encoding): int
    {
        $byteOffsetToSkipBom = 0;

        if ($this->hasBOM($filePointer, $encoding)) {
            $bomUsed = $this->supportedEncodingsWithBom[$encoding];

            // we skip the N first bytes
            $byteOffsetToSkipBom = \strlen($bomUsed);
        }

        return $byteOffsetToSkipBom;
    }

    /**
     * Attempts to convert a non UTF-8 string into UTF-8.
     *
     * @param string $string         Non UTF-8 string to be converted
     * @param string $sourceEncoding The encoding used to encode the source string
     *
     * @return string The converted, UTF-8 string
     *
     * @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
     */
    public function attemptConversionToUTF8(?string $string, string $sourceEncoding): ?string
    {
        return $this->attemptConversion($string, $sourceEncoding, self::ENCODING_UTF8);
    }

    /**
     * Attempts to convert a UTF-8 string into the given encoding.
     *
     * @param string $string         UTF-8 string to be converted
     * @param string $targetEncoding The encoding the string should be re-encoded into
     *
     * @return string The converted string, encoded with the given encoding
     *
     * @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
     */
    public function attemptConversionFromUTF8(?string $string, string $targetEncoding): ?string
    {
        return $this->attemptConversion($string, self::ENCODING_UTF8, $targetEncoding);
    }

    /**
     * Returns whether the file identified by the given pointer has a BOM.
     *
     * @param resource $filePointer Pointer to the file to check
     * @param string   $encoding    Encoding of the file to check
     *
     * @return bool TRUE if the file has a BOM, FALSE otherwise
     */
    private function hasBOM($filePointer, string $encoding): bool
    {
        $hasBOM = false;

        rewind($filePointer);

        if (\array_key_exists($encoding, $this->supportedEncodingsWithBom)) {
            $potentialBom = $this->supportedEncodingsWithBom[$encoding];
            $numBytesInBom = \strlen($potentialBom);

            $hasBOM = (fgets($filePointer, $numBytesInBom + 1) === $potentialBom);
        }

        return $hasBOM;
    }

    /**
     * Attempts to convert the given string to the given encoding.
     * Depending on what is installed on the server, we will try to iconv or mbstring.
     *
     * @param string $string         string to be converted
     * @param string $sourceEncoding The encoding used to encode the source string
     * @param string $targetEncoding The encoding the string should be re-encoded into
     *
     * @return string The converted string, encoded with the given encoding
     *
     * @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
     */
    private function attemptConversion(?string $string, string $sourceEncoding, string $targetEncoding): ?string
    {
        // if source and target encodings are the same, it's a no-op
        if (null === $string || $sourceEncoding === $targetEncoding) {
            return $string;
        }

        $convertedString = null;

        if ($this->canUseIconv) {
            set_error_handler(static function (): bool {
                return true;
            });

            $convertedString = iconv($sourceEncoding, $targetEncoding, $string);

            restore_error_handler();
        } elseif ($this->canUseMbString) {
            $errorMessage = null;
            set_error_handler(static function ($nr, $message) use (&$errorMessage): bool {
                $errorMessage = $message; // @codeCoverageIgnore

                return true; // @codeCoverageIgnore
            });

            try {
                $convertedString = mb_convert_encoding($string, $targetEncoding, $sourceEncoding);
            } catch (Error $error) {
                $errorMessage = $error->getMessage();
            }

            restore_error_handler();
            if (null !== $errorMessage) {
                $convertedString = false;
            }
        } else {
< throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} is not supported. Please install \"iconv\" or \"PHP Intl\".");
> throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} is not supported. Please install \"iconv\" or \"mbstring\".");
} if (false === $convertedString) { throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} failed."); } return $convertedString; } }