Moodle 4.1 XRef and Diffs

Search moodle.org's
Developer Documentation
See Release Notes
Long Term Support Release
Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.
Moodle 4.1 Database Schema (by Marcus Green)
Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401]
   1  <?php
   2  // This file is part of Moodle - http://moodle.org/
   3  //
   4  // Moodle is free software: you can redistribute it and/or modify
   5  // it under the terms of the GNU General Public License as published by
   6  // the Free Software Foundation, either version 3 of the License, or
   7  // (at your option) any later version.
   8  //
   9  // Moodle is distributed in the hope that it will be useful,
  10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  // GNU General Public License for more details.
  13  //
  14  // You should have received a copy of the GNU General Public License
  15  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  16  
  17  /**
  18   * Defines string apis
  19   *
  20   * @package    core
  21   * @copyright  (C) 2001-3001 Eloy Lafuente (stronk7) {@link http://contiento.com}
  22   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23   */
  24  
  25  defined('MOODLE_INTERNAL') || die();
  26  
  27  /**
  28   * defines string api's for manipulating strings
  29   *
  30   * This class is used to manipulate strings under Moodle 1.6 an later. As
  31   * utf-8 text become mandatory a pool of safe functions under this encoding
  32   * become necessary. The name of the methods is exactly the
  33   * same than their PHP originals.
  34   *
  35   * This class was previously based on Typo3 which has now been removed and uses
  36   * native functions now.
  37   *
  38   * @package   core
  39   * @category  string
  40   * @copyright 1999 onwards Martin Dougiamas  {@link http://moodle.com}
  41   * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  42   */
  43  class core_text {
  44      /** @var string Byte order mark for UTF-8 */
  45      const UTF8_BOM = "\xef\xbb\xbf";
  46  
  47      /**
  48       * @var string[] Array of strings representing Unicode non-characters
  49       */
  50      protected static $noncharacters;
  51  
  52      /**
  53       * Check whether the charset is supported by mbstring.
  54       * @param string $charset Normalised charset
  55       * @return bool
  56       */
  57      public static function is_charset_supported(string $charset): bool {
  58          static $cache = null;
  59          if (!$cache) {
  60              $cache = array_flip(array_map('strtolower', mb_list_encodings()));
  61          }
  62  
  63          if (isset($cache[strtolower($charset)])) {
  64              return true;
  65          }
  66  
  67          // We haven't found the charset, check if mb has aliases for the charset.
  68          try {
  69              return mb_encoding_aliases($charset) !== false;
  70          } catch (Throwable $e) {
  71              // A ValueError will be thrown if unsupported.
  72          }
  73  
  74          return false;
  75      }
  76  
  77      /**
  78       * Reset internal textlib caches.
  79       * @static
  80       * @deprecated since Moodle 4.0. See MDL-53544.
  81       * @todo To be removed in Moodle 4.4 - MDL-71748
  82       */
  83      public static function reset_caches() {
  84          debugging("reset_caches() is deprecated. Typo3 has been removed and caches aren't used anymore.", DEBUG_DEVELOPER);
  85      }
  86  
  87      /**
  88       * Standardise charset name
  89       *
  90       * Please note it does not mean the returned charset is actually supported.
  91       *
  92       * @static
  93       * @param string $charset raw charset name
  94       * @return string normalised lowercase charset name
  95       */
  96      public static function parse_charset($charset) {
  97          $charset = strtolower($charset ?? '');
  98  
  99          if ($charset === 'utf8' or $charset === 'utf-8') {
 100              return 'utf-8';
 101          }
 102  
 103          if (preg_match('/^(cp|win|windows)-?(12[0-9]{2})$/', $charset, $matches)) {
 104              return 'windows-'.$matches[2];
 105          }
 106  
 107          if (preg_match('/^iso-8859-[0-9]+$/', $charset, $matches)) {
 108              return $charset;
 109          }
 110  
 111          if ($charset === 'euc-jp') {
 112              return 'euc-jp';
 113          }
 114          if ($charset === 'iso-2022-jp') {
 115              return 'iso-2022-jp';
 116          }
 117          if ($charset === 'shift-jis' or $charset === 'shift_jis') {
 118              return 'shift_jis';
 119          }
 120          if ($charset === 'gb2312') {
 121              return 'gb2312';
 122          }
 123          if ($charset === 'gb18030') {
 124              return 'gb18030';
 125          }
 126          if ($charset === 'ms-ansi') {
 127              return 'windows-1252';
 128          }
 129  
 130          // We have reached this stage and haven't matched with anything. Return the original.
 131          return $charset;
 132      }
 133  
 134      /**
 135       * Converts the text between different encodings. It uses iconv extension with //TRANSLIT parameter.
 136       * If both source and target are utf-8 it tries to fix invalid characters only.
 137       *
 138       * @param string $text
 139       * @param string $fromCS source encoding
 140       * @param string $toCS result encoding
 141       * @return string|bool converted string or false on error
 142       */
 143      public static function convert($text, $fromCS, $toCS='utf-8') {
 144          $fromCS = self::parse_charset($fromCS);
 145          $toCS   = self::parse_charset($toCS);
 146  
 147          $text = (string)$text; // we can work only with strings
 148  
 149          if ($text === '') {
 150              return '';
 151          }
 152  
 153          if ($fromCS === 'utf-8') {
 154              $text = fix_utf8($text);
 155              if ($toCS === 'utf-8') {
 156                  return $text;
 157              }
 158          }
 159  
 160          if ($toCS === 'ascii') {
 161              // Try to normalize the conversion a bit if the target is ascii.
 162              return self::specialtoascii($text, $fromCS);
 163          }
 164  
 165          // Prevent any error notices, do not use //IGNORE so that we get
 166          // consistent result if iconv fails.
 167          return @iconv($fromCS, $toCS.'//TRANSLIT', $text);
 168      }
 169  
 170      /**
 171       * Multibyte safe substr() function, uses mbstring or iconv
 172       *
 173       * @param string $text string to truncate
 174       * @param int $start negative value means from end
 175       * @param int $len maximum length of characters beginning from start
 176       * @param string $charset encoding of the text
 177       * @return string portion of string specified by the $start and $len
 178       */
 179      public static function substr($text, $start, $len=null, $charset='utf-8') {
 180          $charset = self::parse_charset($charset);
 181  
 182          // Check whether the charset is supported by mbstring. CP1250 is not supported. Fall back to iconv.
 183          if (self::is_charset_supported($charset)) {
 184              $result = mb_substr($text ?? '', $start, $len, $charset);
 185          } else {
 186              $result = (string)iconv_substr($text ?? '', $start, $len, $charset);
 187          }
 188  
 189          return $result;
 190      }
 191  
 192      /**
 193       * Truncates a string to no more than a certain number of bytes in a multi-byte safe manner.
 194       * UTF-8 only!
 195       *
 196       * @param string $string String to truncate
 197       * @param int $bytes Maximum length of bytes in the result
 198       * @return string Portion of string specified by $bytes
 199       * @since Moodle 3.1
 200       */
 201      public static function str_max_bytes($string, $bytes) {
 202          return mb_strcut($string ?? '', 0, $bytes, 'UTF-8');
 203      }
 204  
 205      /**
 206       * Finds the last occurrence of a character in a string within another.
 207       * UTF-8 ONLY safe mb_strrchr().
 208       *
 209       * @param string $haystack The string from which to get the last occurrence of needle.
 210       * @param string $needle The string to find in haystack.
 211       * @param boolean $part If true, returns the portion before needle, else return the portion after (including needle).
 212       * @return string|false False when not found.
 213       * @since Moodle 2.4.6, 2.5.2, 2.6
 214       */
 215      public static function strrchr($haystack, $needle, $part = false) {
 216          if (is_null($haystack)) {
 217              // Compatibility with behavior in PHP before version 8.1.
 218              return false;
 219          }
 220          return mb_strrchr($haystack, $needle, $part, 'UTF-8');
 221      }
 222  
 223      /**
 224       * Multibyte safe strlen() function, uses mbstring or iconv
 225       *
 226       * @param string $text input string
 227       * @param string $charset encoding of the text
 228       * @return int number of characters
 229       */
 230      public static function strlen($text, $charset='utf-8') {
 231          $charset = self::parse_charset($charset);
 232  
 233          if (self::is_charset_supported($charset)) {
 234              return mb_strlen($text ?? '', $charset);
 235          }
 236  
 237          return iconv_strlen($text ?? '', $charset);
 238      }
 239  
 240      /**
 241       * Multibyte safe strtolower() function, uses mbstring.
 242       *
 243       * @param string $text input string
 244       * @param string $charset encoding of the text (may not work for all encodings)
 245       * @return string lower case text
 246       */
 247      public static function strtolower($text, $charset='utf-8') {
 248          $charset = self::parse_charset($charset);
 249  
 250          // Confirm mbstring can handle the charset.
 251          if (self::is_charset_supported($charset)) {
 252              return mb_strtolower($text ?? '', $charset);
 253          }
 254  
 255          // The mbstring extension cannot handle the charset. Convert to UTF-8.
 256          $convertedtext = self::convert($text, $charset, 'utf-8');
 257          $result = mb_strtolower($convertedtext);
 258          $result = self::convert($result, 'utf-8', $charset);
 259          return $result;
 260      }
 261  
 262      /**
 263       * Multibyte safe strtoupper() function, uses mbstring.
 264       *
 265       * @param string $text input string
 266       * @param string $charset encoding of the text (may not work for all encodings)
 267       * @return string upper case text
 268       */
 269      public static function strtoupper($text, $charset='utf-8') {
 270          $charset = self::parse_charset($charset);
 271  
 272          // Confirm mbstring can handle the charset.
 273          if (self::is_charset_supported($charset)) {
 274              return mb_strtoupper($text ?? '', $charset);
 275          }
 276  
 277          // The mbstring extension cannot handle the charset. Convert to UTF-8.
 278          $convertedtext = self::convert($text, $charset, 'utf-8');
 279          $result = mb_strtoupper($convertedtext);
 280          $result = self::convert($result, 'utf-8', $charset);
 281          return $result;
 282      }
 283  
 284      /**
 285       * Find the position of the first occurrence of a substring in a string.
 286       * UTF-8 ONLY safe strpos(), uses mbstring
 287       *
 288       * @param string $haystack the string to search in
 289       * @param string $needle one or more charachters to search for
 290       * @param int $offset offset from begining of string
 291       * @return int the numeric position of the first occurrence of needle in haystack.
 292       */
 293      public static function strpos($haystack, $needle, $offset=0) {
 294          return mb_strpos($haystack ?? '', $needle, $offset, 'UTF-8');
 295      }
 296  
 297      /**
 298       * Find the position of the last occurrence of a substring in a string
 299       * UTF-8 ONLY safe strrpos(), uses mbstring
 300       *
 301       * @param string $haystack the string to search in
 302       * @param string $needle one or more charachters to search for
 303       * @return int the numeric position of the last occurrence of needle in haystack
 304       */
 305      public static function strrpos($haystack, $needle) {
 306          if (is_null($haystack)) {
 307              // Compatibility with behavior in PHP before version 8.1.
 308              return false;
 309          }
 310          return mb_strrpos($haystack, $needle, 0, 'UTF-8');
 311      }
 312  
 313      /**
 314       * Reverse UTF-8 multibytes character sets (used for RTL languages)
 315       * (We only do this because there is no mb_strrev or iconv_strrev)
 316       *
 317       * @param string $str the multibyte string to reverse
 318       * @return string the reversed multi byte string
 319       */
 320      public static function strrev($str) {
 321          preg_match_all('/./us', $str ?? '', $ar);
 322          return join('', array_reverse($ar[0]));
 323      }
 324  
 325      /**
 326       * Try to convert upper unicode characters to plain ascii,
 327       * the returned string may contain unconverted unicode characters.
 328       *
 329       * With the removal of typo3, iconv conversions was found to be the best alternative to Typo3's function.
 330       * However using the standard iconv call
 331       *      iconv($charset, 'ASCII//TRANSLIT//IGNORE', (string) $text);
 332       * resulted in invalid strings with special character from Russian/Japanese. To solve this, the transliterator was
 333       * used but this resulted in empty strings for certain strings in our test. It was decided to use a combo of the 2
 334       * to cover all our bases. Refer MDL-53544 for further information.
 335       *
 336       * @param string $text input string
 337       * @param string $charset encoding of the text
 338       * @return string converted ascii string
 339       */
 340      public static function specialtoascii($text, $charset='utf-8') {
 341          $charset = self::parse_charset($charset);
 342          $oldlevel = error_reporting(E_PARSE);
 343  
 344          // Always convert to utf-8, so transliteration can do its work always.
 345          if ($charset !== 'utf-8') {
 346              $text = iconv($charset, 'utf-8'.'//TRANSLIT', $text);
 347          }
 348          $text = transliterator_transliterate('Any-Latin; Latin-ASCII', (string) $text);
 349  
 350          // Still, apply iconv because some chars are not handled by transliterate.
 351          $result = iconv('utf-8', 'ASCII//TRANSLIT//IGNORE', (string) $text);
 352  
 353          error_reporting($oldlevel);
 354          return $result;
 355      }
 356  
 357      /**
 358       * Generate a correct base64 encoded header to be used in MIME mail messages.
 359       * This function seems to be 100% compliant with RFC1342. Credits go to:
 360       * paravoid (http://www.php.net/manual/en/function.mb-encode-mimeheader.php#60283).
 361       *
 362       * @param string $text input string
 363       * @param string $charset encoding of the text
 364       * @return string base64 encoded header
 365       */
 366      public static function encode_mimeheader($text, $charset='utf-8') {
 367          if (empty($text)) {
 368              return (string)$text;
 369          }
 370          // Normalize charset
 371          $charset = self::parse_charset($charset);
 372          // If the text is pure ASCII, we don't need to encode it
 373          if (self::convert($text, $charset, 'ascii') == $text) {
 374              return $text;
 375          }
 376          // Although RFC says that line feed should be \r\n, it seems that
 377          // some mailers double convert \r, so we are going to use \n alone
 378          $linefeed="\n";
 379          // Define start and end of every chunk
 380          $start = "=?$charset?B?";
 381          $end = "?=";
 382          // Accumulate results
 383          $encoded = '';
 384          // Max line length is 75 (including start and end)
 385          $length = 75 - strlen($start) - strlen($end);
 386          // Multi-byte ratio
 387          $multilength = self::strlen($text, $charset);
 388          // Detect if strlen and friends supported
 389          if ($multilength === false) {
 390              if ($charset == 'GB18030' or $charset == 'gb18030') {
 391                  while (strlen($text)) {
 392                      // try to encode first 22 chars - we expect most chars are two bytes long
 393                      if (preg_match('/^(([\x00-\x7f])|([\x81-\xfe][\x40-\x7e])|([\x81-\xfe][\x80-\xfe])|([\x81-\xfe][\x30-\x39]..)){1,22}/m', $text, $matches)) {
 394                          $chunk = $matches[0];
 395                          $encchunk = base64_encode($chunk);
 396                          if (strlen($encchunk) > $length) {
 397                              // find first 11 chars - each char in 4 bytes - worst case scenario
 398                              preg_match('/^(([\x00-\x7f])|([\x81-\xfe][\x40-\x7e])|([\x81-\xfe][\x80-\xfe])|([\x81-\xfe][\x30-\x39]..)){1,11}/m', $text, $matches);
 399                              $chunk = $matches[0];
 400                              $encchunk = base64_encode($chunk);
 401                          }
 402                          $text = substr($text, strlen($chunk));
 403                          $encoded .= ' '.$start.$encchunk.$end.$linefeed;
 404                      } else {
 405                          break;
 406                      }
 407                  }
 408                  $encoded = trim($encoded);
 409                  return $encoded;
 410              } else {
 411                  return false;
 412              }
 413          }
 414          $ratio = $multilength / strlen($text);
 415          // Base64 ratio
 416          $magic = $avglength = floor(3 * $length * $ratio / 4);
 417          // basic infinite loop protection
 418          $maxiterations = strlen($text)*2;
 419          $iteration = 0;
 420          // Iterate over the string in magic chunks
 421          for ($i=0; $i <= $multilength; $i+=$magic) {
 422              if ($iteration++ > $maxiterations) {
 423                  return false; // probably infinite loop
 424              }
 425              $magic = $avglength;
 426              $offset = 0;
 427              // Ensure the chunk fits in length, reducing magic if necessary
 428              do {
 429                  $magic -= $offset;
 430                  $chunk = self::substr($text, $i, $magic, $charset);
 431                  $chunk = base64_encode($chunk);
 432                  $offset++;
 433              } while (strlen($chunk) > $length);
 434              // This chunk doesn't break any multi-byte char. Use it.
 435              if ($chunk)
 436                  $encoded .= ' '.$start.$chunk.$end.$linefeed;
 437          }
 438          // Strip the first space and the last linefeed
 439          $encoded = substr($encoded, 1, -strlen($linefeed));
 440  
 441          return $encoded;
 442      }
 443  
 444      /**
 445       * Returns HTML entity transliteration table.
 446       * @return array with (html entity => utf-8) elements
 447       */
 448      protected static function get_entities_table() {
 449          static $trans_tbl = null;
 450  
 451          // Generate/create $trans_tbl
 452          if (!isset($trans_tbl)) {
 453              if (version_compare(phpversion(), '5.3.4') < 0) {
 454                  $trans_tbl = array();
 455                  foreach (get_html_translation_table(HTML_ENTITIES, ENT_COMPAT) as $val=>$key) {
 456                      $trans_tbl[$key] = self::convert($val, 'ISO-8859-1', 'utf-8');
 457                  }
 458  
 459              } else if (version_compare(phpversion(), '5.4.0') < 0) {
 460                  $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8');
 461                  $trans_tbl = array_flip($trans_tbl);
 462  
 463              } else {
 464                  $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT | ENT_HTML401, 'UTF-8');
 465                  $trans_tbl = array_flip($trans_tbl);
 466              }
 467          }
 468  
 469          return $trans_tbl;
 470      }
 471  
 472      /**
 473       * Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
 474       * Original from laurynas dot butkus at gmail at:
 475       * http://php.net/manual/en/function.html-entity-decode.php#75153
 476       * with some custom mods to provide more functionality
 477       *
 478       * @param string $str input string
 479       * @param boolean $htmlent convert also html entities (defaults to true)
 480       * @return string encoded UTF-8 string
 481       */
 482      public static function entities_to_utf8($str, $htmlent=true) {
 483          static $callback1 = null ;
 484          static $callback2 = null ;
 485  
 486          if (!$callback1 or !$callback2) {
 487              $callback1 = function($matches) {
 488                  return core_text::code2utf8(hexdec($matches[1]));
 489              };
 490              $callback2 = function($matches) {
 491                  return core_text::code2utf8($matches[1]);
 492              };
 493          }
 494  
 495          $result = (string)$str;
 496          $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback1, $result);
 497          $result = preg_replace_callback('/&#([0-9]+);/', $callback2, $result);
 498  
 499          // Replace literal entities (if desired)
 500          if ($htmlent) {
 501              $trans_tbl = self::get_entities_table();
 502              // It should be safe to search for ascii strings and replace them with utf-8 here.
 503              $result = strtr($result, $trans_tbl);
 504          }
 505          // Return utf8-ised string
 506          return $result;
 507      }
 508  
 509      /**
 510       * Converts all Unicode chars > 127 to numeric entities &#nnnn; or &#xnnn;.
 511       *
 512       * @param string $str input string
 513       * @param boolean $dec output decadic only number entities
 514       * @param boolean $nonnum remove all non-numeric entities
 515       * @return string converted string
 516       */
 517      public static function utf8_to_entities($str, $dec=false, $nonnum=false) {
 518          static $callback = null ;
 519  
 520          if ($nonnum) {
 521              $str = self::entities_to_utf8($str, true);
 522          }
 523  
 524          $result = mb_strtolower(mb_encode_numericentity($str ?? '', [0xa0, 0xffff, 0, 0xffff], 'UTF-8', true));
 525  
 526          // We cannot use the decimal equivalent of the above call due to the unit test and our allowance for
 527          // entities to be entered within the provided $str. Refer to the correspond unit test for examples.
 528          if ($dec) {
 529              if (!$callback) {
 530                  $callback = function($matches) {
 531                      return '&#' . hexdec($matches[1]) . ';';
 532                  };
 533              }
 534              $result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback, $result);
 535          }
 536  
 537          return $result;
 538      }
 539  
 540      /**
 541       * Removes the BOM from unicode string {@link http://unicode.org/faq/utf_bom.html}
 542       *
 543       * @param string $str input string
 544       * @return string
 545       */
 546      public static function trim_utf8_bom($str) {
 547          if (is_null($str)) {
 548              return null;
 549          }
 550          $bom = self::UTF8_BOM;
 551          if (strpos($str, $bom) === 0) {
 552              return substr($str, strlen($bom));
 553          }
 554          return $str;
 555      }
 556  
 557      /**
 558       * There are a number of Unicode non-characters including the byte-order mark (which may appear
 559       * multiple times in a string) and also other ranges. These can cause problems for some
 560       * processing.
 561       *
 562       * This function removes the characters using string replace, so that the rest of the string
 563       * remains unchanged.
 564       *
 565       * @param string $value Input string
 566       * @return string Cleaned string value
 567       * @since Moodle 3.5
 568       */
 569      public static function remove_unicode_non_characters($value) {
 570          // Set up list of all Unicode non-characters for fast replacing.
 571          if (!self::$noncharacters) {
 572              self::$noncharacters = [];
 573              // This list of characters is based on the Unicode standard. It includes the last two
 574              // characters of each code planes 0-16 inclusive...
 575              for ($plane = 0; $plane <= 16; $plane++) {
 576                  $base = ($plane === 0 ? '' : dechex($plane));
 577                  self::$noncharacters[] = html_entity_decode('&#x' . $base . 'fffe;', ENT_COMPAT);
 578                  self::$noncharacters[] = html_entity_decode('&#x' . $base . 'ffff;', ENT_COMPAT);
 579              }
 580              // ...And the character range U+FDD0 to U+FDEF.
 581              for ($char = 0xfdd0; $char <= 0xfdef; $char++) {
 582                  self::$noncharacters[] = html_entity_decode('&#x' . dechex($char) . ';', ENT_COMPAT);
 583              }
 584          }
 585  
 586          // Do character replacement.
 587          return str_replace(self::$noncharacters, '', $value);
 588      }
 589  
 590      /**
 591       * Returns encoding options for select boxes, utf-8 and platform encoding first
 592       *
 593       * @return array encodings
 594       */
 595      public static function get_encodings() {
 596          $encodings = array();
 597          $encodings['UTF-8'] = 'UTF-8';
 598          $winenc = strtoupper(get_string('localewincharset', 'langconfig'));
 599          if ($winenc != '') {
 600              $encodings[$winenc] = $winenc;
 601          }
 602          $nixenc = strtoupper(get_string('oldcharset', 'langconfig'));
 603          $encodings[$nixenc] = $nixenc;
 604  
 605          $listedencodings = mb_list_encodings();
 606          foreach ($listedencodings as $enc) {
 607              $enc = strtoupper($enc);
 608              $encodings[$enc] = $enc;
 609          }
 610          return $encodings;
 611      }
 612  
 613      /**
 614       * Returns the utf8 string corresponding to the unicode value
 615       * (from php.net, courtesy - romans@void.lv)
 616       *
 617       * @param  int    $num one unicode value
 618       * @return string the UTF-8 char corresponding to the unicode value
 619       */
 620      public static function code2utf8($num) {
 621          if ($num < 128) {
 622              return chr($num);
 623          }
 624          if ($num < 2048) {
 625              return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
 626          }
 627          if ($num < 65536) {
 628              return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
 629          }
 630          if ($num < 2097152) {
 631              return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
 632          }
 633          return '';
 634      }
 635  
 636      /**
 637       * Returns the code of the given UTF-8 character
 638       *
 639       * @param  string $utf8char one UTF-8 character
 640       * @return int    the code of the given character
 641       */
 642      public static function utf8ord($utf8char) {
 643          if ($utf8char == '') {
 644              return 0;
 645          }
 646          $ord0 = ord($utf8char[0]);
 647          if ($ord0 >= 0 && $ord0 <= 127) {
 648              return $ord0;
 649          }
 650          $ord1 = ord($utf8char[1]);
 651          if ($ord0 >= 192 && $ord0 <= 223) {
 652              return ($ord0 - 192) * 64 + ($ord1 - 128);
 653          }
 654          $ord2 = ord($utf8char[2]);
 655          if ($ord0 >= 224 && $ord0 <= 239) {
 656              return ($ord0 - 224) * 4096 + ($ord1 - 128) * 64 + ($ord2 - 128);
 657          }
 658          $ord3 = ord($utf8char[3]);
 659          if ($ord0 >= 240 && $ord0 <= 247) {
 660              return ($ord0 - 240) * 262144 + ($ord1 - 128 )* 4096 + ($ord2 - 128) * 64 + ($ord3 - 128);
 661          }
 662          return false;
 663      }
 664  
 665      /**
 666       * Makes first letter of each word capital - words must be separated by spaces.
 667       * Use with care, this function does not work properly in many locales!!!
 668       *
 669       * @param string $text input string
 670       * @return string
 671       */
 672      public static function strtotitle($text) {
 673          if (empty($text)) {
 674              return $text;
 675          }
 676  
 677          return mb_convert_case($text, MB_CASE_TITLE, 'UTF-8');
 678      }
 679  }