Moodle 4.1 XRef and Diffs

Search moodle.org's
Developer Documentation
See Release Notes
Long Term Support Release
Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.
Moodle 4.1 Database Schema (by Marcus Green)
/lib/phpxmlrpc/Helper/ -> Charset.php (source)
Differences Between: [Versions 401 and 402] [Versions 401 and 403]
   1  <?php
   2  
   3  namespace PhpXmlRpc\Helper;
   4  
   5  use PhpXmlRpc\PhpXmlRpc;
   6  
   7  /**
   8   * @todo implement an interface
   9   */
  10  class Charset
  11  {
  12      // tables used for transcoding different charsets into us-ascii xml
  13      protected $xml_iso88591_Entities = array("in" => array(), "out" => array());
  14  
  15      //protected $xml_cp1252_Entities = array('in' => array(), out' => array());
  16  
  17      protected $charset_supersets = array(
  18          'US-ASCII' => array('ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4',
  19              'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8',
  20              'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-11', 'ISO-8859-12',
  21              'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'UTF-8',
  22              'EUC-JP', 'EUC-', 'EUC-KR', 'EUC-CN',),
  23      );
  24  
  25      /** @var Charset $instance */
  26      protected static $instance = null;
  27  
  28      /**
  29       * This class is singleton for performance reasons.
  30       * @todo should we just make $xml_iso88591_Entities a static variable instead ?
  31       *
  32       * @return Charset
  33       */
  34      public static function instance()
  35      {
  36          if (self::$instance === null) {
  37              self::$instance = new static();
  38          }
  39  
  40          return self::$instance;
  41      }
  42  
  43      /**
  44       * Force usage as singleton
  45       */
  46      protected function __construct()
  47      {
  48      }
  49  
  50      /**
  51       * @param string $tableName
  52       * @throws \Exception for unsupported $tableName
  53       * @todo add support for cp1252 as well as latin-2 .. latin-10
  54       *       Optimization creep: instead of building all those tables on load, keep them ready-made php files
  55       *       which are not even included until needed
  56       * @todo should we add to the latin-1 table the characters from cp_1252 range, i.e. 128 to 159 ?
  57       *       Those will NOT be present in true ISO-8859-1, but will save the unwary windows user from sending junk
  58       *       (though no luck when receiving them...)
  59       *       Note also that, apparently, while 'ISO/IEC 8859-1' has no characters defined for bytes 128 to 159,
  60       *       IANA ISO-8859-1 does have well-defined 'C1' control codes for those - wikipedia's page on latin-1 says:
  61       *       "ISO-8859-1 is the IANA preferred name for this standard when supplemented with the C0 and C1 control codes from ISO/IEC 6429."
  62       *       Check what mbstring/iconv do by default with those?
  63       */
  64      protected function buildConversionTable($tableName)
  65      {
  66          switch($tableName) {
  67              case 'xml_iso88591_Entities':
  68                  if (count($this->xml_iso88591_Entities['in'])) {
  69                      return;
  70                  }
  71                  for ($i = 0; $i < 32; $i++) {
  72                      $this->xml_iso88591_Entities["in"][] = chr($i);
  73                      $this->xml_iso88591_Entities["out"][] = "&#{$i};";
  74                  }
  75  
  76                  /// @todo to be 'print safe', should we encode as well character 127 (DEL) ?
  77  
  78                  for ($i = 160; $i < 256; $i++) {
  79                      $this->xml_iso88591_Entities["in"][] = chr($i);
  80                      $this->xml_iso88591_Entities["out"][] = "&#{$i};";
  81                  }
  82                  break;
  83  
  84              /*case 'xml_cp1252_Entities':
  85                  if (count($this->xml_cp1252_Entities['in'])) {
  86                      return;
  87                  }
  88                  for ($i = 128; $i < 160; $i++)
  89                  {
  90                      $this->xml_cp1252_Entities['in'][] = chr($i);
  91                  }
  92                  $this->xml_cp1252_Entities['out'] = array(
  93                      '&#x20AC;', '?',        '&#x201A;', '&#x0192;',
  94                      '&#x201E;', '&#x2026;', '&#x2020;', '&#x2021;',
  95                      '&#x02C6;', '&#x2030;', '&#x0160;', '&#x2039;',
  96                      '&#x0152;', '?',        '&#x017D;', '?',
  97                      '?',        '&#x2018;', '&#x2019;', '&#x201C;',
  98                      '&#x201D;', '&#x2022;', '&#x2013;', '&#x2014;',
  99                      '&#x02DC;', '&#x2122;', '&#x0161;', '&#x203A;',
 100                      '&#x0153;', '?',        '&#x017E;', '&#x0178;'
 101                  );
 102                  $this->buildConversionTable('xml_iso88591_Entities');
 103                  break;*/
 104  
 105              default:
 106                  throw new \Exception('Unsupported table: ' . $tableName);
 107          }
 108      }
 109  
 110      /**
 111       * Convert a string to the correct XML representation in a target charset.
 112       * This involves:
 113       * - character transformation for all characters which have a different representation in source and dest charsets
 114       * - using 'charset entity' representation for all characters which are outside of the target charset
 115       *
 116       * To help correct communication of non-ascii chars inside strings, regardless of the charset used when sending
 117       * requests, parsing them, sending responses and parsing responses, an option is to convert all non-ascii chars
 118       * present in the message into their equivalent 'charset entity'. Charset entities enumerated this way are
 119       * independent of the charset encoding used to transmit them, and all XML parsers are bound to understand them.
 120       *
 121       * Note that when not sending a charset encoding mime type along with http headers, we are bound by RFC 3023 to emit
 122       * strict us-ascii for 'text/xml' payloads (but we should review RFC 7303, which seems to have changed the rules...)
 123       *
 124       * @todo do a bit of basic benchmarking (strtr vs. str_replace)
 125       * @todo make usage of iconv() or mb_string() where available
 126       * @todo support aliases for charset names, eg ASCII, LATIN1, ISO-88591 (see f.e. polyfill-iconv for a list),
 127       *       but then take those into account as well in other methods, ie.isValidCharset)
 128       * @todo when converting to ASCII, allow to choose whether to escape the range 0-31,127 (non-print chars) or not
 129       * @todo allow picking different strategies to deal w. invalid chars? eg. source in latin-1 and chars 128-159
 130       * @todo add support for escaping using CDATA sections? (add cdata start and end tokens, replace only ']]>' with ']]]]><![CDATA[>')
 131       *
 132       * @param string $data
 133       * @param string $srcEncoding
 134       * @param string $destEncoding
 135       *
 136       * @return string
 137       */
 138      public function encodeEntities($data, $srcEncoding = '', $destEncoding = '')
 139      {
 140          if ($srcEncoding == '') {
 141              // lame, but we know no better...
 142              $srcEncoding = PhpXmlRpc::$xmlrpc_internalencoding;
 143          }
 144  
 145          if ($destEncoding == '') {
 146              $destEncoding = 'US-ASCII';
 147          }
 148  
 149          $conversion = strtoupper($srcEncoding . '_' . $destEncoding);
 150  
 151          // list ordered with (expected) most common scenarios first
 152          switch ($conversion) {
 153              case 'UTF-8_UTF-8':
 154              case 'ISO-8859-1_ISO-8859-1':
 155              case 'US-ASCII_UTF-8':
 156              case 'US-ASCII_US-ASCII':
 157              case 'US-ASCII_ISO-8859-1':
 158              //case 'CP1252_CP1252':
 159                  $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
 160                  break;
 161  
 162              case 'UTF-8_US-ASCII':
 163              case 'UTF-8_ISO-8859-1':
 164                  // NB: this will choke on invalid UTF-8, going most likely beyond EOF
 165                  $escapedData = '';
 166                  // be kind to users creating string xmlrpc values out of different php types
 167                  $data = (string)$data;
 168                  $ns = strlen($data);
 169                  for ($nn = 0; $nn < $ns; $nn++) {
 170                      $ch = $data[$nn];
 171                      $ii = ord($ch);
 172                      // 7 bits in 1 byte: 0bbbbbbb (127)
 173                      if ($ii < 32) {
 174                          if ($conversion == 'UTF-8_US-ASCII') {
 175                              $escapedData .= sprintf('&#%d;', $ii);
 176                          } else {
 177                              $escapedData .= $ch;
 178                          }
 179                      }
 180                      else if ($ii < 128) {
 181                          /// @todo shall we replace this with a (supposedly) faster str_replace?
 182                          /// @todo to be 'print safe', should we encode as well character 127 (DEL) ?
 183                          switch ($ii) {
 184                              case 34:
 185                                  $escapedData .= '&quot;';
 186                                  break;
 187                              case 38:
 188                                  $escapedData .= '&amp;';
 189                                  break;
 190                              case 39:
 191                                  $escapedData .= '&apos;';
 192                                  break;
 193                              case 60:
 194                                  $escapedData .= '&lt;';
 195                                  break;
 196                              case 62:
 197                                  $escapedData .= '&gt;';
 198                                  break;
 199                              default:
 200                                  $escapedData .= $ch;
 201                          } // switch
 202                      } // 11 bits in 2 bytes: 110bbbbb 10bbbbbb (2047)
 203                      elseif ($ii >> 5 == 6) {
 204                          $b1 = ($ii & 31);
 205                          $b2 = (ord($data[$nn + 1]) & 63);
 206                          $ii = ($b1 * 64) + $b2;
 207                          $escapedData .= sprintf('&#%d;', $ii);
 208                          $nn += 1;
 209                      } // 16 bits in 3 bytes: 1110bbbb 10bbbbbb 10bbbbbb
 210                      elseif ($ii >> 4 == 14) {
 211                          $b1 = ($ii & 15);
 212                          $b2 = (ord($data[$nn + 1]) & 63);
 213                          $b3 = (ord($data[$nn + 2]) & 63);
 214                          $ii = ((($b1 * 64) + $b2) * 64) + $b3;
 215                          $escapedData .= sprintf('&#%d;', $ii);
 216                          $nn += 2;
 217                      } // 21 bits in 4 bytes: 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
 218                      elseif ($ii >> 3 == 30) {
 219                          $b1 = ($ii & 7);
 220                          $b2 = (ord($data[$nn + 1]) & 63);
 221                          $b3 = (ord($data[$nn + 2]) & 63);
 222                          $b4 = (ord($data[$nn + 3]) & 63);
 223                          $ii = ((((($b1 * 64) + $b2) * 64) + $b3) * 64) + $b4;
 224                          $escapedData .= sprintf('&#%d;', $ii);
 225                          $nn += 3;
 226                      }
 227                  }
 228  
 229                  // when converting to latin-1, do not be so eager with using entities for characters 160-255
 230                  if ($conversion == 'UTF-8_ISO-8859-1') {
 231                      $this->buildConversionTable('xml_iso88591_Entities');
 232                      $escapedData = str_replace(array_slice($this->xml_iso88591_Entities['out'], 32), array_slice($this->xml_iso88591_Entities['in'], 32), $escapedData);
 233                  }
 234                  break;
 235  
 236              case 'ISO-8859-1_UTF-8':
 237                  $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
 238                  $escapedData = utf8_encode($escapedData);
 239                  break;
 240  
 241              case 'ISO-8859-1_US-ASCII':
 242                  $this->buildConversionTable('xml_iso88591_Entities');
 243                  $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
 244                  $escapedData = str_replace($this->xml_iso88591_Entities['in'], $this->xml_iso88591_Entities['out'], $escapedData);
 245                  break;
 246  
 247              /*
 248              case 'CP1252_US-ASCII':
 249                  $this->buildConversionTable('xml_cp1252_Entities');
 250                  $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
 251                  $escapedData = str_replace($this->xml_iso88591_Entities']['in'], $this->xml_iso88591_Entities['out'], $escapedData);
 252                  $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
 253                  break;
 254              case 'CP1252_UTF-8':
 255                  $this->buildConversionTable('xml_cp1252_Entities');
 256                  $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
 257                  /// @todo we could use real UTF8 chars here instead of xml entities... (note that utf_8 encode all alone will NOT convert them)
 258                  $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
 259                  $escapedData = utf8_encode($escapedData);
 260                  break;
 261              case 'CP1252_ISO-8859-1':
 262                  $this->buildConversionTable('xml_cp1252_Entities');
 263                  $escapedData = str_replace(array('&', '"', "'", '<', '>'), array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'), $data);
 264                  // we might as well replace all funky chars with a '?' here, but we are kind and leave it to the receiving application layer to decide what to do with these weird entities...
 265                  $escapedData = str_replace($this->xml_cp1252_Entities['in'], $this->xml_cp1252_Entities['out'], $escapedData);
 266                  break;
 267              */
 268  
 269              default:
 270                  $escapedData = '';
 271                  Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ": Converting from $srcEncoding to $destEncoding: not supported...");
 272          }
 273  
 274          return $escapedData;
 275      }
 276  
 277      /**
 278       * Checks if a given charset encoding is present in a list of encodings or if it is a valid subset of any encoding
 279       * in the list.
 280       *
 281       * @param string $encoding charset to be tested
 282       * @param string|array $validList comma separated list of valid charsets (or array of charsets)
 283       *
 284       * @return bool
 285       */
 286      public function isValidCharset($encoding, $validList)
 287      {
 288          if (is_string($validList)) {
 289              $validList = explode(',', $validList);
 290          }
 291          if (@in_array(strtoupper($encoding), $validList)) {
 292              return true;
 293          } else {
 294              if (array_key_exists($encoding, $this->charset_supersets)) {
 295                  foreach ($validList as $allowed) {
 296                      if (in_array($allowed, $this->charset_supersets[$encoding])) {
 297                          return true;
 298                      }
 299                  }
 300              }
 301  
 302              return false;
 303          }
 304      }
 305  
 306      /**
 307       * Used only for backwards compatibility
 308       * @deprecated
 309       *
 310       * @param string $charset
 311       *
 312       * @return array
 313       *
 314       * @throws \Exception for unknown/unsupported charsets
 315       */
 316      public function getEntities($charset)
 317      {
 318          //trigger_error('Method ' . __METHOD__ . ' is deprecated', E_USER_DEPRECATED);
 319  
 320          switch ($charset)
 321          {
 322              case 'iso88591':
 323                  return $this->xml_iso88591_Entities;
 324              default:
 325                  throw new \Exception('Unsupported charset: ' . $charset);
 326          }
 327      }
 328  }