Differences Between: [Versions 310 and 311] [Versions 39 and 311]
1 <?php 2 3 namespace Box\Spout\Common\Helper; 4 5 use Box\Spout\Common\Exception\EncodingConversionException; 6 7 /** 8 * Class EncodingHelper 9 * This class provides helper functions to work with encodings. 10 */ 11 class EncodingHelper 12 { 13 /** Definition of the encodings that can have a BOM */ 14 const ENCODING_UTF8 = 'UTF-8'; 15 const ENCODING_UTF16_LE = 'UTF-16LE'; 16 const ENCODING_UTF16_BE = 'UTF-16BE'; 17 const ENCODING_UTF32_LE = 'UTF-32LE'; 18 const ENCODING_UTF32_BE = 'UTF-32BE'; 19 20 /** Definition of the BOMs for the different encodings */ 21 const BOM_UTF8 = "\xEF\xBB\xBF"; 22 const BOM_UTF16_LE = "\xFF\xFE"; 23 const BOM_UTF16_BE = "\xFE\xFF"; 24 const BOM_UTF32_LE = "\xFF\xFE\x00\x00"; 25 const BOM_UTF32_BE = "\x00\x00\xFE\xFF"; 26 27 /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ 28 protected $globalFunctionsHelper; 29 30 /** @var array Map representing the encodings supporting BOMs (key) and their associated BOM (value) */ 31 protected $supportedEncodingsWithBom; 32 33 /** 34 * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper 35 */ 36 public function __construct($globalFunctionsHelper) 37 { 38 $this->globalFunctionsHelper = $globalFunctionsHelper; 39 40 $this->supportedEncodingsWithBom = [ 41 self::ENCODING_UTF8 => self::BOM_UTF8, 42 self::ENCODING_UTF16_LE => self::BOM_UTF16_LE, 43 self::ENCODING_UTF16_BE => self::BOM_UTF16_BE, 44 self::ENCODING_UTF32_LE => self::BOM_UTF32_LE, 45 self::ENCODING_UTF32_BE => self::BOM_UTF32_BE, 46 ]; 47 } 48 49 /** 50 * Returns the number of bytes to use as offset in order to skip the BOM. 51 * 52 * @param resource $filePointer Pointer to the file to check 53 * @param string $encoding Encoding of the file to check 54 * @return int Bytes offset to apply to skip the BOM (0 means no BOM) 55 */ 56 public function getBytesOffsetToSkipBOM($filePointer, $encoding) 57 { 58 $byteOffsetToSkipBom = 0; 59 60 if ($this->hasBOM($filePointer, $encoding)) { 61 $bomUsed = $this->supportedEncodingsWithBom[$encoding]; 62 63 // we skip the N first bytes 64 $byteOffsetToSkipBom = \strlen($bomUsed); 65 } 66 67 return $byteOffsetToSkipBom; 68 } 69 70 /** 71 * Returns whether the file identified by the given pointer has a BOM. 72 * 73 * @param resource $filePointer Pointer to the file to check 74 * @param string $encoding Encoding of the file to check 75 * @return bool TRUE if the file has a BOM, FALSE otherwise 76 */ 77 protected function hasBOM($filePointer, $encoding) 78 { 79 $hasBOM = false; 80 81 $this->globalFunctionsHelper->rewind($filePointer); 82 83 if (\array_key_exists($encoding, $this->supportedEncodingsWithBom)) { 84 $potentialBom = $this->supportedEncodingsWithBom[$encoding]; 85 $numBytesInBom = \strlen($potentialBom); 86 87 $hasBOM = ($this->globalFunctionsHelper->fgets($filePointer, $numBytesInBom + 1) === $potentialBom); 88 } 89 90 return $hasBOM; 91 } 92 93 /** 94 * Attempts to convert a non UTF-8 string into UTF-8. 95 * 96 * @param string $string Non UTF-8 string to be converted 97 * @param string $sourceEncoding The encoding used to encode the source string 98 * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed 99 * @return string The converted, UTF-8 string 100 */ 101 public function attemptConversionToUTF8($string, $sourceEncoding) 102 { 103 return $this->attemptConversion($string, $sourceEncoding, self::ENCODING_UTF8); 104 } 105 106 /** 107 * Attempts to convert a UTF-8 string into the given encoding. 108 * 109 * @param string $string UTF-8 string to be converted 110 * @param string $targetEncoding The encoding the string should be re-encoded into 111 * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed 112 * @return string The converted string, encoded with the given encoding 113 */ 114 public function attemptConversionFromUTF8($string, $targetEncoding) 115 { 116 return $this->attemptConversion($string, self::ENCODING_UTF8, $targetEncoding); 117 } 118 119 /** 120 * Attempts to convert the given string to the given encoding. 121 * Depending on what is installed on the server, we will try to iconv or mbstring. 122 * 123 * @param string $string string to be converted 124 * @param string $sourceEncoding The encoding used to encode the source string 125 * @param string $targetEncoding The encoding the string should be re-encoded into 126 * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed 127 * @return string The converted string, encoded with the given encoding 128 */ 129 protected function attemptConversion($string, $sourceEncoding, $targetEncoding) 130 { 131 // if source and target encodings are the same, it's a no-op 132 if ($sourceEncoding === $targetEncoding) { 133 return $string; 134 } 135 136 $convertedString = null; 137 138 if ($this->canUseIconv()) { 139 $convertedString = $this->globalFunctionsHelper->iconv($string, $sourceEncoding, $targetEncoding); 140 } elseif ($this->canUseMbString()) { 141 $convertedString = $this->globalFunctionsHelper->mb_convert_encoding($string, $sourceEncoding, $targetEncoding); 142 } else { 143 throw new EncodingConversionException("The conversion from $sourceEncoding to $targetEncoding is not supported. Please install \"iconv\" or \"PHP Intl\"."); 144 } 145 146 if ($convertedString === false) { 147 throw new EncodingConversionException("The conversion from $sourceEncoding to $targetEncoding failed."); 148 } 149 150 return $convertedString; 151 } 152 153 /** 154 * Returns whether "iconv" can be used. 155 * 156 * @return bool TRUE if "iconv" is available and can be used, FALSE otherwise 157 */ 158 protected function canUseIconv() 159 { 160 return $this->globalFunctionsHelper->function_exists('iconv'); 161 } 162 163 /** 164 * Returns whether "mb_string" functions can be used. 165 * These functions come with the PHP Intl package. 166 * 167 * @return bool TRUE if "mb_string" functions are available and can be used, FALSE otherwise 168 */ 169 protected function canUseMbString() 170 { 171 return $this->globalFunctionsHelper->function_exists('mb_convert_encoding'); 172 } 173 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body