Differences Between: [Versions 402 and 403]
1 <?php 2 3 declare(strict_types=1); 4 5 namespace OpenSpout\Common\Helper; 6 7 use Error; 8 use OpenSpout\Common\Exception\EncodingConversionException; 9 10 /** 11 * @internal 12 */ 13 final class EncodingHelper 14 { 15 /** 16 * Definition of the encodings that can have a BOM. 17 */ 18 public const ENCODING_UTF8 = 'UTF-8'; 19 public const ENCODING_UTF16_LE = 'UTF-16LE'; 20 public const ENCODING_UTF16_BE = 'UTF-16BE'; 21 public const ENCODING_UTF32_LE = 'UTF-32LE'; 22 public const ENCODING_UTF32_BE = 'UTF-32BE'; 23 24 /** 25 * Definition of the BOMs for the different encodings. 26 */ 27 public const BOM_UTF8 = "\xEF\xBB\xBF"; 28 public const BOM_UTF16_LE = "\xFF\xFE"; 29 public const BOM_UTF16_BE = "\xFE\xFF"; 30 public const BOM_UTF32_LE = "\xFF\xFE\x00\x00"; 31 public const BOM_UTF32_BE = "\x00\x00\xFE\xFF"; 32 33 /** @var array<string, string> Map representing the encodings supporting BOMs (key) and their associated BOM (value) */ 34 private array $supportedEncodingsWithBom; 35 36 private bool $canUseIconv; 37 38 private bool $canUseMbString; 39 40 public function __construct(bool $canUseIconv, bool $canUseMbString) 41 { 42 $this->canUseIconv = $canUseIconv; 43 $this->canUseMbString = $canUseMbString; 44 45 $this->supportedEncodingsWithBom = [ 46 self::ENCODING_UTF8 => self::BOM_UTF8, 47 self::ENCODING_UTF16_LE => self::BOM_UTF16_LE, 48 self::ENCODING_UTF16_BE => self::BOM_UTF16_BE, 49 self::ENCODING_UTF32_LE => self::BOM_UTF32_LE, 50 self::ENCODING_UTF32_BE => self::BOM_UTF32_BE, 51 ]; 52 } 53 54 public static function factory(): self 55 { 56 return new self( 57 \function_exists('iconv'), 58 \function_exists('mb_convert_encoding'), 59 ); 60 } 61 62 /** 63 * Returns the number of bytes to use as offset in order to skip the BOM. 64 * 65 * @param resource $filePointer Pointer to the file to check 66 * @param string $encoding Encoding of the file to check 67 * 68 * @return int Bytes offset to apply to skip the BOM (0 means no BOM) 69 */ 70 public function getBytesOffsetToSkipBOM($filePointer, string $encoding): int 71 { 72 $byteOffsetToSkipBom = 0; 73 74 if ($this->hasBOM($filePointer, $encoding)) { 75 $bomUsed = $this->supportedEncodingsWithBom[$encoding]; 76 77 // we skip the N first bytes 78 $byteOffsetToSkipBom = \strlen($bomUsed); 79 } 80 81 return $byteOffsetToSkipBom; 82 } 83 84 /** 85 * Attempts to convert a non UTF-8 string into UTF-8. 86 * 87 * @param string $string Non UTF-8 string to be converted 88 * @param string $sourceEncoding The encoding used to encode the source string 89 * 90 * @return string The converted, UTF-8 string 91 * 92 * @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed 93 */ 94 public function attemptConversionToUTF8(?string $string, string $sourceEncoding): ?string 95 { 96 return $this->attemptConversion($string, $sourceEncoding, self::ENCODING_UTF8); 97 } 98 99 /** 100 * Attempts to convert a UTF-8 string into the given encoding. 101 * 102 * @param string $string UTF-8 string to be converted 103 * @param string $targetEncoding The encoding the string should be re-encoded into 104 * 105 * @return string The converted string, encoded with the given encoding 106 * 107 * @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed 108 */ 109 public function attemptConversionFromUTF8(?string $string, string $targetEncoding): ?string 110 { 111 return $this->attemptConversion($string, self::ENCODING_UTF8, $targetEncoding); 112 } 113 114 /** 115 * Returns whether the file identified by the given pointer has a BOM. 116 * 117 * @param resource $filePointer Pointer to the file to check 118 * @param string $encoding Encoding of the file to check 119 * 120 * @return bool TRUE if the file has a BOM, FALSE otherwise 121 */ 122 private function hasBOM($filePointer, string $encoding): bool 123 { 124 $hasBOM = false; 125 126 rewind($filePointer); 127 128 if (\array_key_exists($encoding, $this->supportedEncodingsWithBom)) { 129 $potentialBom = $this->supportedEncodingsWithBom[$encoding]; 130 $numBytesInBom = \strlen($potentialBom); 131 132 $hasBOM = (fgets($filePointer, $numBytesInBom + 1) === $potentialBom); 133 } 134 135 return $hasBOM; 136 } 137 138 /** 139 * Attempts to convert the given string to the given encoding. 140 * Depending on what is installed on the server, we will try to iconv or mbstring. 141 * 142 * @param string $string string to be converted 143 * @param string $sourceEncoding The encoding used to encode the source string 144 * @param string $targetEncoding The encoding the string should be re-encoded into 145 * 146 * @return string The converted string, encoded with the given encoding 147 * 148 * @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed 149 */ 150 private function attemptConversion(?string $string, string $sourceEncoding, string $targetEncoding): ?string 151 { 152 // if source and target encodings are the same, it's a no-op 153 if (null === $string || $sourceEncoding === $targetEncoding) { 154 return $string; 155 } 156 157 $convertedString = null; 158 159 if ($this->canUseIconv) { 160 set_error_handler(static function (): bool { 161 return true; 162 }); 163 164 $convertedString = iconv($sourceEncoding, $targetEncoding, $string); 165 166 restore_error_handler(); 167 } elseif ($this->canUseMbString) { 168 $errorMessage = null; 169 set_error_handler(static function ($nr, $message) use (&$errorMessage): bool { 170 $errorMessage = $message; // @codeCoverageIgnore 171 172 return true; // @codeCoverageIgnore 173 }); 174 175 try { 176 $convertedString = mb_convert_encoding($string, $targetEncoding, $sourceEncoding); 177 } catch (Error $error) { 178 $errorMessage = $error->getMessage(); 179 } 180 181 restore_error_handler(); 182 if (null !== $errorMessage) { 183 $convertedString = false; 184 } 185 } else { 186 throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} is not supported. Please install \"iconv\" or \"mbstring\"."); 187 } 188 189 if (false === $convertedString) { 190 throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} failed."); 191 } 192 193 return $convertedString; 194 } 195 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body