Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Shared;
   4  
   5  use PhpOffice\PhpSpreadsheet\Exception as PhpSpreadsheetException;
   6  
   7  class CodePage
   8  {
   9      public const DEFAULT_CODE_PAGE = 'CP1252';
  10  
  11      /** @var array */
  12      private static $pageArray = [
  13          0 => 'CP1252', //    CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
  14          367 => 'ASCII', //    ASCII
  15          437 => 'CP437', //    OEM US
  16          //720 => 'notsupported', //    OEM Arabic
  17          737 => 'CP737', //    OEM Greek
  18          775 => 'CP775', //    OEM Baltic
  19          850 => 'CP850', //    OEM Latin I
  20          852 => 'CP852', //    OEM Latin II (Central European)
  21          855 => 'CP855', //    OEM Cyrillic
  22          857 => 'CP857', //    OEM Turkish
  23          858 => 'CP858', //    OEM Multilingual Latin I with Euro
  24          860 => 'CP860', //    OEM Portugese
  25          861 => 'CP861', //    OEM Icelandic
  26          862 => 'CP862', //    OEM Hebrew
  27          863 => 'CP863', //    OEM Canadian (French)
  28          864 => 'CP864', //    OEM Arabic
  29          865 => 'CP865', //    OEM Nordic
  30          866 => 'CP866', //    OEM Cyrillic (Russian)
  31          869 => 'CP869', //    OEM Greek (Modern)
  32          874 => 'CP874', //    ANSI Thai
  33          932 => 'CP932', //    ANSI Japanese Shift-JIS
  34          936 => 'CP936', //    ANSI Chinese Simplified GBK
  35          949 => 'CP949', //    ANSI Korean (Wansung)
  36          950 => 'CP950', //    ANSI Chinese Traditional BIG5
  37          1200 => 'UTF-16LE', //    UTF-16 (BIFF8)
  38          1250 => 'CP1250', //    ANSI Latin II (Central European)
  39          1251 => 'CP1251', //    ANSI Cyrillic
  40          1252 => 'CP1252', //    ANSI Latin I (BIFF4-BIFF7)
  41          1253 => 'CP1253', //    ANSI Greek
  42          1254 => 'CP1254', //    ANSI Turkish
  43          1255 => 'CP1255', //    ANSI Hebrew
  44          1256 => 'CP1256', //    ANSI Arabic
  45          1257 => 'CP1257', //    ANSI Baltic
  46          1258 => 'CP1258', //    ANSI Vietnamese
  47          1361 => 'CP1361', //    ANSI Korean (Johab)
  48          10000 => 'MAC', //    Apple Roman
  49          10001 => 'CP932', //    Macintosh Japanese
  50          10002 => 'CP950', //    Macintosh Chinese Traditional
  51          10003 => 'CP1361', //    Macintosh Korean
  52          10004 => 'MACARABIC', //    Apple Arabic
  53          10005 => 'MACHEBREW', //    Apple Hebrew
  54          10006 => 'MACGREEK', //    Macintosh Greek
  55          10007 => 'MACCYRILLIC', //    Macintosh Cyrillic
  56          10008 => 'CP936', //    Macintosh - Simplified Chinese (GB 2312)
  57          10010 => 'MACROMANIA', //    Macintosh Romania
  58          10017 => 'MACUKRAINE', //    Macintosh Ukraine
  59          10021 => 'MACTHAI', //    Macintosh Thai
  60          10029 => ['MACCENTRALEUROPE', 'MAC-CENTRALEUROPE'], //    Macintosh Central Europe
  61          10079 => 'MACICELAND', //    Macintosh Icelandic
  62          10081 => 'MACTURKISH', //    Macintosh Turkish
  63          10082 => 'MACCROATIAN', //    Macintosh Croatian
  64          21010 => 'UTF-16LE', //    UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously use Codepage 21010 for UTF-16LE
  65          32768 => 'MAC', //    Apple Roman
  66          //32769 => 'unsupported', //    ANSI Latin I (BIFF2-BIFF3)
  67          65000 => 'UTF-7', //    Unicode (UTF-7)
  68          65001 => 'UTF-8', //    Unicode (UTF-8)
  69          99999 => ['unsupported'], //    Unicode (UTF-8)
  70      ];
  71  
  72      public static function validate(string $codePage): bool
  73      {
  74          return in_array($codePage, self::$pageArray, true);
  75      }
  76  
  77      /**
  78       * Convert Microsoft Code Page Identifier to Code Page Name which iconv
  79       * and mbstring understands.
  80       *
  81       * @param int $codePage Microsoft Code Page Indentifier
  82       *
  83       * @return string Code Page Name
  84       */
  85      public static function numberToName(int $codePage): string
  86      {
  87          if (array_key_exists($codePage, self::$pageArray)) {
  88              $value = self::$pageArray[$codePage];
  89              if (is_array($value)) {
  90                  foreach ($value as $encoding) {
  91                      if (@iconv('UTF-8', $encoding, ' ') !== false) {
  92                          self::$pageArray[$codePage] = $encoding;
  93  
  94                          return $encoding;
  95                      }
  96                  }
  97  
  98                  throw new PhpSpreadsheetException("Code page $codePage not implemented on this system.");
  99              } else {
 100                  return $value;
 101              }
 102          }
 103          if ($codePage == 720 || $codePage == 32769) {
 104              throw new PhpSpreadsheetException("Code page $codePage not supported."); //    OEM Arabic
 105          }
 106  
 107          throw new PhpSpreadsheetException('Unknown codepage: ' . $codePage);
 108      }
 109  
 110      public static function getEncodings(): array
 111      {
 112          return self::$pageArray;
 113      }
 114  }