Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.

Differences Between: [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403]

   1  <?php
   2  /**
   3   * Provides static methods for charset and locale safe string manipulation.
   4   *
   5   * Copyright 2003-2017 Horde LLC (http://www.horde.org/)
   6   *
   7   * See the enclosed file LICENSE for license information (LGPL). If you
   8   * did not receive this file, see http://www.horde.org/licenses/lgpl21.
   9   *
  10   * @todo Split up in Horde_String_Multibyte for multibyte-safe methods and
  11   *       Horde_String_Locale for locale-safe methods.
  12   *
  13   * @author   Jan Schneider <jan@horde.org>
  14   * @category Horde
  15   * @license  http://www.horde.org/licenses/lgpl21 LGPL 2.1
  16   * @package  Util
  17   */
  18  class Horde_String
  19  {
  20      /**
  21       * lower() cache.
  22       *
  23       * @var array
  24       */
  25      protected static $_lowers = array();
  26  
  27      /**
  28       * upper() cache.
  29       *
  30       * @var array
  31       */
  32      protected static $_uppers = array();
  33  
  34      /**
  35       * Converts a string from one charset to another.
  36       *
  37       * Uses the iconv or the mbstring extensions.
  38       * The original string is returned if conversion failed or none
  39       * of the extensions were available.
  40       *
  41       * @param mixed $input    The data to be converted. If $input is an an
  42       *                        array, the array's values get converted
  43       *                        recursively.
  44       * @param string $from    The string's current charset.
  45       * @param string $to      The charset to convert the string to.
  46       * @param boolean $force  Force conversion?
  47       *
  48       * @return mixed  The converted input data.
  49       */
  50      public static function convertCharset($input, $from, $to, $force = false)
  51      {
  52          /* Don't bother converting numbers. */
  53          if (is_numeric($input)) {
  54              return $input;
  55          }
  56  
  57          /* If the from and to character sets are identical, return now. */
  58          if (!$force && $from == $to) {
  59              return $input;
  60          }
  61          $from = self::lower($from);
  62          $to = self::lower($to);
  63          if (!$force && $from == $to) {
  64              return $input;
  65          }
  66  
  67          if (is_array($input)) {
  68              $tmp = array();
  69              foreach ($input as $key => $val) {
  70                  $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force);
  71              }
  72              return $tmp;
  73          }
  74  
  75          if (is_object($input)) {
  76              // PEAR_Error/Exception objects are almost guaranteed to contain
  77              // recursion, which will cause a segfault in PHP. We should never
  78              // reach this line, but add a check.
  79              if (($input instanceof Exception) ||
  80                  ($input instanceof PEAR_Error)) {
  81                  return '';
  82              }
  83  
  84              $input = clone $input;
  85              $vars = get_object_vars($input);
  86              foreach ($vars as $key => $val) {
  87                  $input->$key = self::convertCharset($val, $from, $to, $force);
  88              }
  89              return $input;
  90          }
  91  
  92          if (!is_string($input)) {
  93              return $input;
  94          }
  95  
  96          return self::_convertCharset($input, $from, $to);
  97      }
  98  
  99      /**
 100       * Internal function used to do charset conversion.
 101       *
 102       * @param string $input  See self::convertCharset().
 103       * @param string $from   See self::convertCharset().
 104       * @param string $to     See self::convertCharset().
 105       *
 106       * @return string  The converted string.
 107       */
 108      protected static function _convertCharset($input, $from, $to)
 109      {
 110          /* Use utf8_[en|de]code() if possible and if the string isn't too
 111           * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these
 112           * functions use more memory. */
 113          if (Horde_Util::extensionExists('xml') &&
 114              ((strlen($input) < 16777216) ||
 115               !Horde_Util::extensionExists('iconv') ||
 116               !Horde_Util::extensionExists('mbstring'))) {
 117              if (($to == 'utf-8') &&
 118                  in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
 119                  return utf8_encode($input);
 120              }
 121  
 122              if (($from == 'utf-8') &&
 123                  in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
 124                  return utf8_decode($input);
 125              }
 126          }
 127  
 128          /* Try UTF7-IMAP conversions. */
 129          if (($from == 'utf7-imap') || ($to == 'utf7-imap')) {
 130              try {
 131                  if ($from == 'utf7-imap') {
 132                      return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to);
 133                  } else {
 134                      if ($from == 'utf-8') {
 135                          $conv = $input;
 136                      } else {
 137                          $conv = self::convertCharset($input, $from, 'UTF-8');
 138                      }
 139                      return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv);
 140                  }
 141              } catch (Horde_Imap_Client_Exception $e) {
 142                  return $input;
 143              }
 144          }
 145  
 146          /* Try iconv with transliteration. */
 147          if (Horde_Util::extensionExists('iconv')) {
 148              unset($php_errormsg);
 149              ini_set('track_errors', 1);
 150              $out = @iconv($from, $to . '//TRANSLIT', $input);
 151              $errmsg = isset($php_errormsg);
 152              ini_restore('track_errors');
 153              if (!$errmsg && $out !== false) {
 154                  return $out;
 155              }
 156          }
 157  
 158          /* Try mbstring. */
 159          if (Horde_Util::extensionExists('mbstring')) {
 160              $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from));
 161              if (!empty($out)) {
 162                  return $out;
 163              }
 164          }
 165  
 166          return $input;
 167      }
 168  
 169      /**
 170       * Makes a string lowercase.
 171       *
 172       * @param string $string   The string to be converted.
 173       * @param boolean $locale  If true the string will be converted based on
 174       *                         a given charset, locale independent else.
 175       * @param string $charset  If $locale is true, the charset to use when
 176       *                         converting.
 177       *
 178       * @return string  The string with lowercase characters.
 179       */
 180      public static function lower($string, $locale = false, $charset = null)
 181      {
 182          if ($locale) {
 183              if (Horde_Util::extensionExists('mbstring')) {
 184                  if (is_null($charset)) {
 185                      throw new InvalidArgumentException('$charset argument must not be null');
 186                  }
 187                  $ret = @mb_strtolower($string, self::_mbstringCharset($charset));
 188                  if (!empty($ret)) {
 189                      return $ret;
 190                  }
 191              }
 192              return strtolower($string);
 193          }
 194  
 195          if (!isset(self::$_lowers[$string])) {
 196              $language = setlocale(LC_CTYPE, 0);
 197              setlocale(LC_CTYPE, 'C');
 198              self::$_lowers[$string] = strtolower($string);
 199              setlocale(LC_CTYPE, $language);
 200          }
 201  
 202          return self::$_lowers[$string];
 203      }
 204  
 205      /**
 206       * Makes a string uppercase.
 207       *
 208       * @param string $string   The string to be converted.
 209       * @param boolean $locale  If true the string will be converted based on a
 210       *                         given charset, locale independent else.
 211       * @param string $charset  If $locale is true, the charset to use when
 212       *                         converting. If not provided the current charset.
 213       *
 214       * @return string  The string with uppercase characters.
 215       */
 216      public static function upper($string, $locale = false, $charset = null)
 217      {
 218          if ($locale) {
 219              if (Horde_Util::extensionExists('mbstring')) {
 220                  if (is_null($charset)) {
 221                      throw new InvalidArgumentException('$charset argument must not be null');
 222                  }
 223                  $ret = @mb_strtoupper($string, self::_mbstringCharset($charset));
 224                  if (!empty($ret)) {
 225                      return $ret;
 226                  }
 227              }
 228              return strtoupper($string);
 229          }
 230  
 231          if (!isset(self::$_uppers[$string])) {
 232              $language = setlocale(LC_CTYPE, 0);
 233              setlocale(LC_CTYPE, 'C');
 234              self::$_uppers[$string] = strtoupper($string);
 235              setlocale(LC_CTYPE, $language);
 236          }
 237  
 238          return self::$_uppers[$string];
 239      }
 240  
 241      /**
 242       * Returns a string with the first letter capitalized if it is
 243       * alphabetic.
 244       *
 245       * @param string $string   The string to be capitalized.
 246       * @param boolean $locale  If true the string will be converted based on a
 247       *                         given charset, locale independent else.
 248       * @param string $charset  The charset to use, defaults to current charset.
 249       *
 250       * @return string  The capitalized string.
 251       */
 252      public static function ucfirst($string, $locale = false, $charset = null)
 253      {
 254          if ($locale) {
 255              if (is_null($charset)) {
 256                  throw new InvalidArgumentException('$charset argument must not be null');
 257              }
 258              $first = self::substr($string, 0, 1, $charset);
 259              if (self::isAlpha($first, $charset)) {
 260                  $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset);
 261              }
 262          } else {
 263              $string = self::upper(substr($string, 0, 1), false) . substr($string, 1);
 264          }
 265  
 266          return $string;
 267      }
 268  
 269      /**
 270       * Returns a string with the first letter of each word capitalized if it is
 271       * alphabetic.
 272       *
 273       * Sentences are splitted into words at whitestrings.
 274       *
 275       * @param string $string   The string to be capitalized.
 276       * @param boolean $locale  If true the string will be converted based on a
 277       *                         given charset, locale independent else.
 278       * @param string $charset  The charset to use, defaults to current charset.
 279       *
 280       * @return string  The capitalized string.
 281       */
 282      public static function ucwords($string, $locale = false, $charset = null)
 283      {
 284          $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
 285          for ($i = 0, $c = count($words); $i < $c; $i += 2) {
 286              $words[$i] = self::ucfirst($words[$i], $locale, $charset);
 287          }
 288          return implode('', $words);
 289      }
 290  
 291      /**
 292       * Returns part of a string.
 293       *
 294       * @param string $string   The string to be converted.
 295       * @param integer $start   The part's start position, zero based.
 296       * @param integer $length  The part's length.
 297       * @param string $charset  The charset to use when calculating the part's
 298       *                         position and length, defaults to current
 299       *                         charset.
 300       *
 301       * @return string  The string's part.
 302       */
 303      public static function substr($string, $start, $length = null,
 304                                    $charset = 'UTF-8')
 305      {
 306          if (is_null($length)) {
 307              $length = self::length($string, $charset) - $start;
 308          }
 309  
 310          if ($length === 0) {
 311              return '';
 312          }
 313  
 314          $error = false;
 315  
 316          /* Try mbstring. */
 317          if (Horde_Util::extensionExists('mbstring')) {
 318              $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset));
 319  
 320              /* mb_substr() returns empty string on failure. */
 321              if (strlen($ret)) {
 322                  return $ret;
 323              }
 324              $error = true;
 325          }
 326  
 327          /* Try iconv. */
 328          if (Horde_Util::extensionExists('iconv')) {
 329              $ret = @iconv_substr($string, $start, $length, $charset);
 330  
 331              /* iconv_substr() returns false on failure. */
 332              if ($ret !== false) {
 333                  return $ret;
 334              }
 335              $error = true;
 336          }
 337  
 338          /* Try intl. */
 339          if (Horde_Util::extensionExists('intl')) {
 340              $ret = self::convertCharset(
 341                  @grapheme_substr(
 342                      self::convertCharset($string, $charset, 'UTF-8'),
 343                      $start,
 344                      $length
 345                  ),
 346                  'UTF-8',
 347                  $charset
 348              );
 349  
 350              /* grapheme_substr() returns false on failure. */
 351              if ($ret !== false) {
 352                  return $ret;
 353              }
 354              $error = true;
 355          }
 356  
 357          return $error
 358              ? ''
 359              : substr($string, $start, $length);
 360      }
 361  
 362      /**
 363       * Returns the character (not byte) length of a string.
 364       *
 365       * @param string $string  The string to return the length of.
 366       * @param string $charset The charset to use when calculating the string's
 367       *                        length.
 368       *
 369       * @return integer  The string's length.
 370       */
 371      public static function length($string, $charset = 'UTF-8')
 372      {
 373          $charset = self::lower($charset);
 374  
 375          if ($charset == 'utf-8' || $charset == 'utf8') {
 376              return strlen(utf8_decode($string));
 377          }
 378  
 379          if (Horde_Util::extensionExists('mbstring')) {
 380              $ret = @mb_strlen($string, self::_mbstringCharset($charset));
 381              if (!empty($ret)) {
 382                  return $ret;
 383              }
 384          }
 385          if (Horde_Util::extensionExists('intl')) {
 386              return grapheme_strlen(
 387                  self::convertCharset($string, $charset, 'UTF-8')
 388              );
 389          }
 390  
 391          return strlen($string);
 392      }
 393  
 394      /**
 395       * Returns the numeric position of the first occurrence of $needle
 396       * in the $haystack string.
 397       *
 398       * @param string $haystack  The string to search through.
 399       * @param string $needle    The string to search for.
 400       * @param integer $offset   Character in $haystack to start searching at.
 401       * @param string $charset   Charset of $needle.
 402       *
 403       * @return integer  The position of first occurrence.
 404       */
 405      public static function pos(
 406          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 407      )
 408      {
 409          return self::_pos($haystack, $needle, $offset, $charset, 'strpos');
 410      }
 411  
 412      /**
 413       * Returns the numeric position of the first case-insensitive occurrence
 414       * of $needle in the $haystack string.
 415       *
 416       * @since 2.5.0
 417       *
 418       * @param string $haystack  The string to search through.
 419       * @param string $needle    The string to search for.
 420       * @param integer $offset   Character in $haystack to start searching at.
 421       * @param string $charset   Charset of $needle.
 422       *
 423       * @return integer  The position of first case-insensitive occurrence.
 424       */
 425      public static function ipos(
 426          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 427      )
 428      {
 429          return self::_pos($haystack, $needle, $offset, $charset, 'stripos');
 430      }
 431  
 432      /**
 433       * Returns the numeric position of the last occurrence of $needle
 434       * in the $haystack string.
 435       *
 436       * @param string $haystack  The string to search through.
 437       * @param string $needle    The string to search for.
 438       * @param integer $offset   Character in $haystack to start searching at.
 439       * @param string $charset   Charset of $needle.
 440       *
 441       * @return integer  The position of last occurrence.
 442       */
 443      public static function rpos(
 444          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 445      )
 446      {
 447          return self::_pos($haystack, $needle, $offset, $charset, 'strrpos');
 448      }
 449  
 450      /**
 451       * Returns the numeric position of the last case-insensitive occurrence of
 452       * $needle in the $haystack string.
 453       *
 454       * @since 2.5.0
 455       *
 456       * @param string $haystack  The string to search through.
 457       * @param string $needle    The string to search for.
 458       * @param integer $offset   Character in $haystack to start searching at.
 459       * @param string $charset   Charset of $needle.
 460       *
 461       * @return integer  The position of last case-insensitive occurrence.
 462       */
 463      public static function ripos(
 464          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 465      )
 466      {
 467          return self::_pos($haystack, $needle, $offset, $charset, 'strripos');
 468      }
 469  
 470      /**
 471       * Perform string position searches.
 472       *
 473       * @param string $haystack  The string to search through.
 474       * @param string $needle    The string to search for.
 475       * @param integer $offset   Character in $haystack to start searching at.
 476       * @param string $charset   Charset of $needle.
 477       * @param string $func      Function to use.
 478       *
 479       * @return integer  The position of occurrence.
 480       *
 481       */
 482      protected static function _pos(
 483          $haystack, $needle, $offset, $charset, $func
 484      )
 485      {
 486          if (Horde_Util::extensionExists('mbstring')) {
 487              unset($php_errormsg);
 488              $track_errors = ini_set('track_errors', 1);
 489              $ret = @call_user_func('mb_' . $func, $haystack, $needle, $offset, self::_mbstringCharset($charset));
 490              ini_set('track_errors', $track_errors);
 491              if (!isset($php_errormsg)) {
 492                  return $ret;
 493              }
 494          }
 495  
 496          if (Horde_Util::extensionExists('intl')) {
 497              unset($php_errormsg);
 498              $track_errors = ini_set('track_errors', 1);
 499              $ret = self::convertCharset(
 500                  @call_user_func(
 501                      'grapheme_' . $func,
 502                      self::convertCharset($haystack, $charset, 'UTF-8'),
 503                      self::convertCharset($needle, $charset, 'UTF-8'),
 504                      $offset
 505                  ),
 506                  'UTF-8',
 507                  $charset
 508              );
 509              ini_set('track_errors', $track_errors);
 510              if (!isset($php_errormsg)) {
 511                  return $ret;
 512              }
 513          }
 514  
 515          return $func($haystack, $needle, $offset);
 516      }
 517  
 518      /**
 519       * Returns a string padded to a certain length with another string.
 520       * This method behaves exactly like str_pad() but is multibyte safe.
 521       *
 522       * @param string $input    The string to be padded.
 523       * @param integer $length  The length of the resulting string.
 524       * @param string $pad      The string to pad the input string with. Must
 525       *                         be in the same charset like the input string.
 526       * @param const $type      The padding type. One of STR_PAD_LEFT,
 527       *                         STR_PAD_RIGHT, or STR_PAD_BOTH.
 528       * @param string $charset  The charset of the input and the padding
 529       *                         strings.
 530       *
 531       * @return string  The padded string.
 532       */
 533      public static function pad($input, $length, $pad = ' ',
 534                                 $type = STR_PAD_RIGHT, $charset = 'UTF-8')
 535      {
 536          $mb_length = self::length($input, $charset);
 537          $sb_length = strlen($input);
 538          $pad_length = self::length($pad, $charset);
 539  
 540          /* Return if we already have the length. */
 541          if ($mb_length >= $length) {
 542              return $input;
 543          }
 544  
 545          /* Shortcut for single byte strings. */
 546          if ($mb_length == $sb_length && $pad_length == strlen($pad)) {
 547              return str_pad($input, $length, $pad, $type);
 548          }
 549  
 550          switch ($type) {
 551          case STR_PAD_LEFT:
 552              $left = $length - $mb_length;
 553              $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input;
 554              break;
 555  
 556          case STR_PAD_BOTH:
 557              $left = floor(($length - $mb_length) / 2);
 558              $right = ceil(($length - $mb_length) / 2);
 559              $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) .
 560                  $input .
 561                  self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
 562              break;
 563  
 564          case STR_PAD_RIGHT:
 565              $right = $length - $mb_length;
 566              $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
 567              break;
 568          }
 569  
 570          return $output;
 571      }
 572  
 573      /**
 574       * Wraps the text of a message.
 575       *
 576       * @param string $string         String containing the text to wrap.
 577       * @param integer $width         Wrap the string at this number of
 578       *                               characters.
 579       * @param string $break          Character(s) to use when breaking lines.
 580       * @param boolean $cut           Whether to cut inside words if a line
 581       *                               can't be wrapped.
 582       * @param boolean $line_folding  Whether to apply line folding rules per
 583       *                               RFC 822 or similar. The correct break
 584       *                               characters including leading whitespace
 585       *                               have to be specified too.
 586       *
 587       * @return string  String containing the wrapped text.
 588       */
 589      public static function wordwrap($string, $width = 75, $break = "\n",
 590                                      $cut = false, $line_folding = false)
 591      {
 592          $breakRegex = '(?:' . preg_quote($break) . ')';
 593          $rpos = self::rpos($break, "\n");
 594          if ($rpos === false) {
 595              $rpos = 0;
 596          } else {
 597              $rpos++;
 598          }
 599          $wrapped = '';
 600          $hasWrapped = false;
 601  
 602          while (self::length($string, 'UTF-8') > $width) {
 603              $line = self::substr($string, 0, $width + ($hasWrapped ? $rpos : 0), 'UTF-8');
 604              $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8');
 605  
 606              // Make sure we didn't cut a word, unless we want hard breaks
 607              // anyway.
 608              if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) {
 609                  $line .= $match[1];
 610                  $string = $match[2];
 611              }
 612  
 613              // Wrap at existing line breaks.
 614              $regex = '/^(' . ($hasWrapped ? $breakRegex : '') . '.*?)(\r?\n)(.*)$/us';
 615              if (preg_match($regex, $line, $match)) {
 616                  $wrapped .= $match[1] . $match[2];
 617                  $string = $match[3] . $string;
 618                  $hasWrapped = false;
 619                  continue;
 620              }
 621  
 622              // Wrap at the last colon or semicolon followed by a whitespace if
 623              // doing line folding.
 624              if ($line_folding &&
 625                  preg_match('/^(.*?)(;|:)(\s+.*)$/us', $line, $match)) {
 626                  $wrapped .= $match[1] . $match[2];
 627                  $string = $break . $match[3] . $string;
 628                  $hasWrapped = true;
 629                  continue;
 630              }
 631  
 632              // Wrap at the last whitespace of $line.
 633              $sub = $line_folding
 634                  ? '(' . ($hasWrapped ? $breakRegex : '') . '.+[^\s])'
 635                  : '(' . ($hasWrapped ? $breakRegex : '') . '.*)';
 636  
 637              if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) {
 638                  $wrapped .= $match[1];
 639                  $string = $break . ($line_folding ? $match[2] : '')
 640                      . $match[3] . $string;
 641                  $hasWrapped = true;
 642                  continue;
 643              }
 644  
 645              // Hard wrap if necessary.
 646              if ($cut) {
 647                  $wrapped .= $line;
 648                  $string = $break . $string;
 649                  $hasWrapped = true;
 650                  continue;
 651              }
 652  
 653              $wrapped .= $line;
 654              $hasWrapped = false;
 655          }
 656  
 657          return $wrapped . $string;
 658      }
 659  
 660      /**
 661       * Wraps the text of a message.
 662       *
 663       * @param string $text        String containing the text to wrap.
 664       * @param integer $length     Wrap $text at this number of characters.
 665       * @param string $break_char  Character(s) to use when breaking lines.
 666       * @param boolean $quote      Ignore lines that are wrapped with the '>'
 667       *                            character (RFC 2646)? If true, we don't
 668       *                            remove any padding whitespace at the end of
 669       *                            the string.
 670       *
 671       * @return string  String containing the wrapped text.
 672       */
 673      public static function wrap($text, $length = 80, $break_char = "\n",
 674                                  $quote = false)
 675      {
 676          $paragraphs = array();
 677  
 678          foreach (preg_split('/\r?\n/', $text) as $input) {
 679              if ($quote && (strpos($input, '>') === 0)) {
 680                  $line = $input;
 681              } else {
 682                  /* We need to handle the Usenet-style signature line
 683                   * separately; since the space after the two dashes is
 684                   * REQUIRED, we don't want to trim the line. */
 685                  if ($input != '-- ') {
 686                      $input = rtrim($input);
 687                  }
 688                  $line = self::wordwrap($input, $length, $break_char);
 689              }
 690  
 691              $paragraphs[] = $line;
 692          }
 693  
 694          return implode($break_char, $paragraphs);
 695      }
 696  
 697      /**
 698       * Return a truncated string, suitable for notifications.
 699       *
 700       * @param string $text     The original string.
 701       * @param integer $length  The maximum length.
 702       *
 703       * @return string  The truncated string, if longer than $length.
 704       */
 705      public static function truncate($text, $length = 100)
 706      {
 707          return (self::length($text) > $length)
 708              ? rtrim(self::substr($text, 0, $length - 3)) . '...'
 709              : $text;
 710      }
 711  
 712      /**
 713       * Return an abbreviated string, with characters in the middle of the
 714       * excessively long string replaced by '...'.
 715       *
 716       * @param string $text     The original string.
 717       * @param integer $length  The length at which to abbreviate.
 718       *
 719       * @return string  The abbreviated string, if longer than $length.
 720       */
 721      public static function abbreviate($text, $length = 20)
 722      {
 723          return (self::length($text) > $length)
 724              ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1))
 725              : $text;
 726      }
 727  
 728      /**
 729       * Returns the common leading part of two strings.
 730       *
 731       * @param string $str1  A string.
 732       * @param string $str2  Another string.
 733       *
 734       * @return string  The start of $str1 and $str2 that is identical in both.
 735       */
 736      public static function common($str1, $str2)
 737      {
 738          for ($result = '', $i = 0;
 739               isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i];
 740               $i++) {
 741              $result .= $str1[$i];
 742          }
 743          return $result;
 744      }
 745  
 746      /**
 747       * Returns true if the every character in the parameter is an alphabetic
 748       * character.
 749       *
 750       * @param string $string   The string to test.
 751       * @param string $charset  The charset to use when testing the string.
 752       *
 753       * @return boolean  True if the parameter was alphabetic only.
 754       */
 755      public static function isAlpha($string, $charset)
 756      {
 757          if (!Horde_Util::extensionExists('mbstring')) {
 758              return ctype_alpha($string);
 759          }
 760  
 761          $charset = self::_mbstringCharset($charset);
 762          $old_charset = mb_regex_encoding();
 763  
 764          if ($charset != $old_charset) {
 765              @mb_regex_encoding($charset);
 766          }
 767          $alpha = !@mb_ereg_match('[^[:alpha:]]', $string);
 768          if ($charset != $old_charset) {
 769              @mb_regex_encoding($old_charset);
 770          }
 771  
 772          return $alpha;
 773      }
 774  
 775      /**
 776       * Returns true if ever character in the parameter is a lowercase letter in
 777       * the current locale.
 778       *
 779       * @param string $string   The string to test.
 780       * @param string $charset  The charset to use when testing the string.
 781       *
 782       * @return boolean  True if the parameter was lowercase.
 783       */
 784      public static function isLower($string, $charset)
 785      {
 786          return ((self::lower($string, true, $charset) === $string) &&
 787                  self::isAlpha($string, $charset));
 788      }
 789  
 790      /**
 791       * Returns true if every character in the parameter is an uppercase letter
 792       * in the current locale.
 793       *
 794       * @param string $string   The string to test.
 795       * @param string $charset  The charset to use when testing the string.
 796       *
 797       * @return boolean  True if the parameter was uppercase.
 798       */
 799      public static function isUpper($string, $charset)
 800      {
 801          return ((self::upper($string, true, $charset) === $string) &&
 802                  self::isAlpha($string, $charset));
 803      }
 804  
 805      /**
 806       * Performs a multibyte safe regex match search on the text provided.
 807       *
 808       * @param string $text     The text to search.
 809       * @param array $regex     The regular expressions to use, without perl
 810       *                         regex delimiters (e.g. '/' or '|').
 811       * @param string $charset  The character set of the text.
 812       *
 813       * @return array  The matches array from the first regex that matches.
 814       */
 815      public static function regexMatch($text, $regex, $charset = null)
 816      {
 817          if (!empty($charset)) {
 818              $regex = self::convertCharset($regex, $charset, 'utf-8');
 819              $text = self::convertCharset($text, $charset, 'utf-8');
 820          }
 821  
 822          $matches = array();
 823          foreach ($regex as $val) {
 824              if (preg_match('/' . $val . '/u', $text, $matches)) {
 825                  break;
 826              }
 827          }
 828  
 829          if (!empty($charset)) {
 830              $matches = self::convertCharset($matches, 'utf-8', $charset);
 831          }
 832  
 833          return $matches;
 834      }
 835  
 836      /**
 837       * Check to see if a string is valid UTF-8.
 838       *
 839       * @param string $text  The text to check.
 840       *
 841       * @return boolean  True if valid UTF-8.
 842       */
 843      public static function validUtf8($text)
 844      {
 845          $text = strval($text);
 846  
 847          // First check for illegal surrogate pair sequences. See RFC 3629.
 848          if (preg_match('/\xE0[\x80-\x9F][\x80-\xBF]|\xED[\xA0-\xBF][\x80-\xBF]/S', $text)) {
 849              return false;
 850          }
 851  
 852          for ($i = 0, $len = strlen($text); $i < $len; ++$i) {
 853              $c = ord($text[$i]);
 854              if ($c > 128) {
 855                  if ($c > 247) {
 856                      // STD 63 (RFC 3629) eliminates 5 & 6-byte characters.
 857                      return false;
 858                  } elseif ($c > 239) {
 859                      $j = 3;
 860                  } elseif ($c > 223) {
 861                      $j = 2;
 862                  } elseif ($c > 191) {
 863                      $j = 1;
 864                  } else {
 865                      return false;
 866                  }
 867  
 868                  if (($i + $j) > $len) {
 869                      return false;
 870                  }
 871  
 872                  do {
 873                      $c = ord($text[++$i]);
 874                      if (($c < 128) || ($c > 191)) {
 875                          return false;
 876                      }
 877                  } while (--$j);
 878              }
 879          }
 880  
 881          return true;
 882      }
 883  
 884      /**
 885       * Workaround charsets that don't work with mbstring functions.
 886       *
 887       * @param string $charset  The original charset.
 888       *
 889       * @return string  The charset to use with mbstring functions.
 890       */
 891      protected static function _mbstringCharset($charset)
 892      {
 893          /* mbstring functions do not handle the 'ks_c_5601-1987' &
 894           * 'ks_c_5601-1989' charsets. However, these charsets are used, for
 895           * example, by various versions of Outlook to send Korean characters.
 896           * Use UHC (CP949) encoding instead. See, e.g.,
 897           * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */
 898          return in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989'))
 899              ? 'UHC'
 900              : $charset;
 901      }
 902  
 903      /**
 904       * Strip UTF-8 byte order mark (BOM) from string data.
 905       *
 906       * @param string $str  Input string (UTF-8).
 907       *
 908       * @return string  Stripped string (UTF-8).
 909       */
 910      public static function trimUtf8Bom($str)
 911      {
 912          return (substr($str, 0, 3) == pack('CCC', 239, 187, 191))
 913              ? substr($str, 3)
 914              : $str;
 915      }
 916  
 917  }