<?php
namespace PhpOffice\PhpSpreadsheet\Shared;
use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException;
class CodePage
{
> public const DEFAULT_CODE_PAGE = 'CP1252';
/**
>
* Convert Microsoft Code Page Identifier to Code Page Name which iconv
> /** @var array */
* and mbstring understands.
> private static $pageArray = [
*
> 0 => 'CP1252', // CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
* @param int $codePage Microsoft Code Page Indentifier
> 367 => 'ASCII', // ASCII
*
> 437 => 'CP437', // OEM US
* @throws PhpSpreadsheetException
> //720 => 'notsupported', // OEM Arabic
*
> 737 => 'CP737', // OEM Greek
* @return string Code Page Name
> 775 => 'CP775', // OEM Baltic
*/
> 850 => 'CP850', // OEM Latin I
public static function numberToName($codePage)
> 852 => 'CP852', // OEM Latin II (Central European)
{
> 855 => 'CP855', // OEM Cyrillic
switch ($codePage) {
> 857 => 'CP857', // OEM Turkish
case 367:
> 858 => 'CP858', // OEM Multilingual Latin I with Euro
return 'ASCII'; // ASCII
> 860 => 'CP860', // OEM Portugese
case 437:
> 861 => 'CP861', // OEM Icelandic
return 'CP437'; // OEM US
> 862 => 'CP862', // OEM Hebrew
case 720:
> 863 => 'CP863', // OEM Canadian (French)
throw new PhpSpreadsheetException('Code page 720 not supported.'); // OEM Arabic
> 864 => 'CP864', // OEM Arabic
case 737:
> 865 => 'CP865', // OEM Nordic
return 'CP737'; // OEM Greek
> 866 => 'CP866', // OEM Cyrillic (Russian)
case 775:
> 869 => 'CP869', // OEM Greek (Modern)
return 'CP775'; // OEM Baltic
> 874 => 'CP874', // ANSI Thai
case 850:
> 932 => 'CP932', // ANSI Japanese Shift-JIS
return 'CP850'; // OEM Latin I
> 936 => 'CP936', // ANSI Chinese Simplified GBK
case 852:
> 949 => 'CP949', // ANSI Korean (Wansung)
return 'CP852'; // OEM Latin II (Central European)
> 950 => 'CP950', // ANSI Chinese Traditional BIG5
case 855:
> 1200 => 'UTF-16LE', // UTF-16 (BIFF8)
return 'CP855'; // OEM Cyrillic
> 1250 => 'CP1250', // ANSI Latin II (Central European)
case 857:
> 1251 => 'CP1251', // ANSI Cyrillic
return 'CP857'; // OEM Turkish
> 1252 => 'CP1252', // ANSI Latin I (BIFF4-BIFF7)
case 858:
> 1253 => 'CP1253', // ANSI Greek
return 'CP858'; // OEM Multilingual Latin I with Euro
> 1254 => 'CP1254', // ANSI Turkish
case 860:
> 1255 => 'CP1255', // ANSI Hebrew
return 'CP860'; // OEM Portugese
> 1256 => 'CP1256', // ANSI Arabic
case 861:
> 1257 => 'CP1257', // ANSI Baltic
return 'CP861'; // OEM Icelandic
> 1258 => 'CP1258', // ANSI Vietnamese
case 862:
> 1361 => 'CP1361', // ANSI Korean (Johab)
return 'CP862'; // OEM Hebrew
> 10000 => 'MAC', // Apple Roman
case 863:
> 10001 => 'CP932', // Macintosh Japanese
return 'CP863'; // OEM Canadian (French)
> 10002 => 'CP950', // Macintosh Chinese Traditional
case 864:
> 10003 => 'CP1361', // Macintosh Korean
return 'CP864'; // OEM Arabic
> 10004 => 'MACARABIC', // Apple Arabic
case 865:
> 10005 => 'MACHEBREW', // Apple Hebrew
return 'CP865'; // OEM Nordic
> 10006 => 'MACGREEK', // Macintosh Greek
case 866:
> 10007 => 'MACCYRILLIC', // Macintosh Cyrillic
return 'CP866'; // OEM Cyrillic (Russian)
> 10008 => 'CP936', // Macintosh - Simplified Chinese (GB 2312)
case 869:
> 10010 => 'MACROMANIA', // Macintosh Romania
return 'CP869'; // OEM Greek (Modern)
> 10017 => 'MACUKRAINE', // Macintosh Ukraine
case 874:
> 10021 => 'MACTHAI', // Macintosh Thai
return 'CP874'; // ANSI Thai
> 10029 => ['MACCENTRALEUROPE', 'MAC-CENTRALEUROPE'], // Macintosh Central Europe
case 932:
> 10079 => 'MACICELAND', // Macintosh Icelandic
return 'CP932'; // ANSI Japanese Shift-JIS
> 10081 => 'MACTURKISH', // Macintosh Turkish
case 936:
> 10082 => 'MACCROATIAN', // Macintosh Croatian
return 'CP936'; // ANSI Chinese Simplified GBK
> 21010 => 'UTF-16LE', // UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously use Codepage 21010 for UTF-16LE
case 949:
> 32768 => 'MAC', // Apple Roman
return 'CP949'; // ANSI Korean (Wansung)
> //32769 => 'unsupported', // ANSI Latin I (BIFF2-BIFF3)
case 950:
> 65000 => 'UTF-7', // Unicode (UTF-7)
return 'CP950'; // ANSI Chinese Traditional BIG5
> 65001 => 'UTF-8', // Unicode (UTF-8)
case 1200:
> 99999 => ['unsupported'], // Unicode (UTF-8)
return 'UTF-16LE'; // UTF-16 (BIFF8)
> ];
case 1250:
>
return 'CP1250'; // ANSI Latin II (Central European)
> public static function validate(string $codePage): bool
case 1251:
> {
return 'CP1251'; // ANSI Cyrillic
> return in_array($codePage, self::$pageArray, true);
case 0:
> }
// CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
>
< * @throws PhpSpreadsheetException
< *
< public static function numberToName($codePage)
> public static function numberToName(int $codePage): string
< switch ($codePage) {
< case 367:
< return 'ASCII'; // ASCII
< case 437:
< return 'CP437'; // OEM US
< case 720:
< throw new PhpSpreadsheetException('Code page 720 not supported.'); // OEM Arabic
< case 737:
< return 'CP737'; // OEM Greek
< case 775:
< return 'CP775'; // OEM Baltic
< case 850:
< return 'CP850'; // OEM Latin I
< case 852:
< return 'CP852'; // OEM Latin II (Central European)
< case 855:
< return 'CP855'; // OEM Cyrillic
< case 857:
< return 'CP857'; // OEM Turkish
< case 858:
< return 'CP858'; // OEM Multilingual Latin I with Euro
< case 860:
< return 'CP860'; // OEM Portugese
< case 861:
< return 'CP861'; // OEM Icelandic
< case 862:
< return 'CP862'; // OEM Hebrew
< case 863:
< return 'CP863'; // OEM Canadian (French)
< case 864:
< return 'CP864'; // OEM Arabic
< case 865:
< return 'CP865'; // OEM Nordic
< case 866:
< return 'CP866'; // OEM Cyrillic (Russian)
< case 869:
< return 'CP869'; // OEM Greek (Modern)
< case 874:
< return 'CP874'; // ANSI Thai
< case 932:
< return 'CP932'; // ANSI Japanese Shift-JIS
< case 936:
< return 'CP936'; // ANSI Chinese Simplified GBK
< case 949:
< return 'CP949'; // ANSI Korean (Wansung)
< case 950:
< return 'CP950'; // ANSI Chinese Traditional BIG5
< case 1200:
< return 'UTF-16LE'; // UTF-16 (BIFF8)
< case 1250:
< return 'CP1250'; // ANSI Latin II (Central European)
< case 1251:
< return 'CP1251'; // ANSI Cyrillic
< case 0:
< // CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
< case 1252:
< return 'CP1252'; // ANSI Latin I (BIFF4-BIFF7)
< case 1253:
< return 'CP1253'; // ANSI Greek
< case 1254:
< return 'CP1254'; // ANSI Turkish
< case 1255:
< return 'CP1255'; // ANSI Hebrew
< case 1256:
< return 'CP1256'; // ANSI Arabic
< case 1257:
< return 'CP1257'; // ANSI Baltic
< case 1258:
< return 'CP1258'; // ANSI Vietnamese
< case 1361:
< return 'CP1361'; // ANSI Korean (Johab)
< case 10000:
< return 'MAC'; // Apple Roman
< case 10001:
< return 'CP932'; // Macintosh Japanese
< case 10002:
< return 'CP950'; // Macintosh Chinese Traditional
< case 10003:
< return 'CP1361'; // Macintosh Korean
< case 10004:
< return 'MACARABIC'; // Apple Arabic
< case 10005:
< return 'MACHEBREW'; // Apple Hebrew
< case 10006:
< return 'MACGREEK'; // Macintosh Greek
< case 10007:
< return 'MACCYRILLIC'; // Macintosh Cyrillic
< case 10008:
< return 'CP936'; // Macintosh - Simplified Chinese (GB 2312)
< case 10010:
< return 'MACROMANIA'; // Macintosh Romania
< case 10017:
< return 'MACUKRAINE'; // Macintosh Ukraine
< case 10021:
< return 'MACTHAI'; // Macintosh Thai
< case 10029:
< return 'MACCENTRALEUROPE'; // Macintosh Central Europe
< case 10079:
< return 'MACICELAND'; // Macintosh Icelandic
< case 10081:
< return 'MACTURKISH'; // Macintosh Turkish
< case 10082:
< return 'MACCROATIAN'; // Macintosh Croatian
< case 21010:
< return 'UTF-16LE'; // UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously use Codepage 21010 for UTF-16LE
< case 32768:
< return 'MAC'; // Apple Roman
< case 32769:
< throw new PhpSpreadsheetException('Code page 32769 not supported.'); // ANSI Latin I (BIFF2-BIFF3)
< case 65000:
< return 'UTF-7'; // Unicode (UTF-7)
< case 65001:
< return 'UTF-8'; // Unicode (UTF-8)
> if (array_key_exists($codePage, self::$pageArray)) {
> $value = self::$pageArray[$codePage];
> if (is_array($value)) {
> foreach ($value as $encoding) {
> if (@iconv('UTF-8', $encoding, ' ') !== false) {
> self::$pageArray[$codePage] = $encoding;
>
> return $encoding;
> }
> }
>
> throw new PhpSpreadsheetException("Code page $codePage not implemented on this system.");
> } else {
> return $value;
> }
> }
> if ($codePage == 720 || $codePage == 32769) {
> throw new PhpSpreadsheetException("Code page $codePage not supported."); // OEM Arabic
> }
>
> public static function getEncodings(): array
> {
> return self::$pageArray;