Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]

   1  <?php
   2  /**
   3   * Provides static methods for charset and locale safe string manipulation.
   4   *
   5   * Copyright 2003-2017 Horde LLC (http://www.horde.org/)
   6   *
   7   * See the enclosed file LICENSE for license information (LGPL). If you
   8   * did not receive this file, see http://www.horde.org/licenses/lgpl21.
   9   *
  10   * @todo Split up in Horde_String_Multibyte for multibyte-safe methods and
  11   *       Horde_String_Locale for locale-safe methods.
  12   *
  13   * @author   Jan Schneider <jan@horde.org>
  14   * @category Horde
  15   * @license  http://www.horde.org/licenses/lgpl21 LGPL 2.1
  16   * @package  Util
  17   */
  18  class Horde_String
  19  {
  20      /**
  21       * lower() cache.
  22       *
  23       * @var array
  24       */
  25      protected static $_lowers = array();
  26  
  27      /**
  28       * upper() cache.
  29       *
  30       * @var array
  31       */
  32      protected static $_uppers = array();
  33  
  34      /**
  35       * Converts a string from one charset to another.
  36       *
  37       * Uses the iconv or the mbstring extensions.
  38       * The original string is returned if conversion failed or none
  39       * of the extensions were available.
  40       *
  41       * @param mixed $input    The data to be converted. If $input is an an
  42       *                        array, the array's values get converted
  43       *                        recursively.
  44       * @param string $from    The string's current charset.
  45       * @param string $to      The charset to convert the string to.
  46       * @param boolean $force  Force conversion?
  47       *
  48       * @return mixed  The converted input data.
  49       */
  50      public static function convertCharset($input, $from, $to, $force = false)
  51      {
  52          /* Don't bother converting numbers. */
  53          if (is_numeric($input)) {
  54              return $input;
  55          }
  56  
  57          /* If the from and to character sets are identical, return now. */
  58          if (!$force && $from == $to) {
  59              return $input;
  60          }
  61          $from = self::lower($from);
  62          $to = self::lower($to);
  63          if (!$force && $from == $to) {
  64              return $input;
  65          }
  66  
  67          if (is_array($input)) {
  68              $tmp = array();
  69              foreach ($input as $key => $val) {
  70                  $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force);
  71              }
  72              return $tmp;
  73          }
  74  
  75          if (is_object($input)) {
  76              // PEAR_Error/Exception objects are almost guaranteed to contain
  77              // recursion, which will cause a segfault in PHP. We should never
  78              // reach this line, but add a check.
  79              if (($input instanceof Exception) ||
  80                  ($input instanceof PEAR_Error)) {
  81                  return '';
  82              }
  83  
  84              $input = clone $input;
  85              $vars = get_object_vars($input);
  86              foreach ($vars as $key => $val) {
  87                  $input->$key = self::convertCharset($val, $from, $to, $force);
  88              }
  89              return $input;
  90          }
  91  
  92          if (!is_string($input)) {
  93              return $input;
  94          }
  95  
  96          return self::_convertCharset($input, $from, $to);
  97      }
  98  
  99      /**
 100       * Internal function used to do charset conversion.
 101       *
 102       * @param string $input  See self::convertCharset().
 103       * @param string $from   See self::convertCharset().
 104       * @param string $to     See self::convertCharset().
 105       *
 106       * @return string  The converted string.
 107       */
 108      protected static function _convertCharset($input, $from, $to)
 109      {
 110          /* Use utf8_[en|de]code() if possible and if the string isn't too
 111           * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these
 112           * functions use more memory. */
 113          if (Horde_Util::extensionExists('xml') &&
 114              ((strlen($input) < 16777216) ||
 115               !Horde_Util::extensionExists('iconv') ||
 116               !Horde_Util::extensionExists('mbstring'))) {
 117              if (($to == 'utf-8') &&
 118                  in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
 119                  return utf8_encode($input);
 120              }
 121  
 122              if (($from == 'utf-8') &&
 123                  in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
 124                  return utf8_decode($input);
 125              }
 126          }
 127  
 128          /* Try UTF7-IMAP conversions. */
 129          if (($from == 'utf7-imap') || ($to == 'utf7-imap')) {
 130              try {
 131                  if ($from == 'utf7-imap') {
 132                      return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to);
 133                  } else {
 134                      if ($from == 'utf-8') {
 135                          $conv = $input;
 136                      } else {
 137                          $conv = self::convertCharset($input, $from, 'UTF-8');
 138                      }
 139                      return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv);
 140                  }
 141              } catch (Horde_Imap_Client_Exception $e) {
 142                  return $input;
 143              }
 144          }
 145  
 146          /* Try iconv with transliteration. */
 147          if (Horde_Util::extensionExists('iconv')) {
 148              unset($php_errormsg);
 149              ini_set('track_errors', 1);
 150              $out = @iconv($from, $to . '//TRANSLIT', $input);
 151              $errmsg = isset($php_errormsg);
 152              ini_restore('track_errors');
 153              if (!$errmsg && $out !== false) {
 154                  return $out;
 155              }
 156          }
 157  
 158          /* Try mbstring. */
 159          if (Horde_Util::extensionExists('mbstring')) {
 160              try {
 161                  $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from));
 162                  if (!empty($out)) {
 163                      return $out;
 164                  }
 165              } catch (ValueError $e) {
 166                  // catch error thrown under PHP 8.0, if mbstring does not support the encoding
 167              }
 168          }
 169  
 170          return $input;
 171      }
 172  
 173      /**
 174       * Makes a string lowercase.
 175       *
 176       * @param string $string   The string to be converted.
 177       * @param boolean $locale  If true the string will be converted based on
 178       *                         a given charset, locale independent else.
 179       * @param string $charset  If $locale is true, the charset to use when
 180       *                         converting.
 181       *
 182       * @return string  The string with lowercase characters.
 183       */
 184      public static function lower($string, $locale = false, $charset = null)
 185      {
 186          if ($locale) {
 187              if (Horde_Util::extensionExists('mbstring')) {
 188                  if (is_null($charset)) {
 189                      throw new InvalidArgumentException('$charset argument must not be null');
 190                  }
 191                  $ret = @mb_strtolower($string, self::_mbstringCharset($charset));
 192                  if (!empty($ret)) {
 193                      return $ret;
 194                  }
 195              }
 196              return strtolower($string);
 197          }
 198  
 199          if (!isset(self::$_lowers[$string])) {
 200              $language = setlocale(LC_CTYPE, 0);
 201              setlocale(LC_CTYPE, 'C');
 202              if ($string === null) {
 203                  self::$_lowers[$string] = '';
 204              } else {
 205                  self::$_lowers[$string] = strtolower($string);
 206              }
 207              setlocale(LC_CTYPE, $language);
 208          }
 209  
 210          return self::$_lowers[$string];
 211      }
 212  
 213      /**
 214       * Makes a string uppercase.
 215       *
 216       * @param string $string   The string to be converted.
 217       * @param boolean $locale  If true the string will be converted based on a
 218       *                         given charset, locale independent else.
 219       * @param string $charset  If $locale is true, the charset to use when
 220       *                         converting. If not provided the current charset.
 221       *
 222       * @return string  The string with uppercase characters.
 223       */
 224      public static function upper($string, $locale = false, $charset = null)
 225      {
 226          if ($locale) {
 227              if (Horde_Util::extensionExists('mbstring')) {
 228                  if (is_null($charset)) {
 229                      throw new InvalidArgumentException('$charset argument must not be null');
 230                  }
 231                  $ret = @mb_strtoupper($string, self::_mbstringCharset($charset));
 232                  if (!empty($ret)) {
 233                      return $ret;
 234                  }
 235              }
 236              return strtoupper($string);
 237          }
 238  
 239          if (!isset(self::$_uppers[$string])) {
 240              $language = setlocale(LC_CTYPE, 0);
 241              setlocale(LC_CTYPE, 'C');
 242              self::$_uppers[$string] = strtoupper($string);
 243              setlocale(LC_CTYPE, $language);
 244          }
 245  
 246          return self::$_uppers[$string];
 247      }
 248  
 249      /**
 250       * Returns a string with the first letter capitalized if it is
 251       * alphabetic.
 252       *
 253       * @param string $string   The string to be capitalized.
 254       * @param boolean $locale  If true the string will be converted based on a
 255       *                         given charset, locale independent else.
 256       * @param string $charset  The charset to use, defaults to current charset.
 257       *
 258       * @return string  The capitalized string.
 259       */
 260      public static function ucfirst($string, $locale = false, $charset = null)
 261      {
 262          if ($locale) {
 263              if (is_null($charset)) {
 264                  throw new InvalidArgumentException('$charset argument must not be null');
 265              }
 266              $first = self::substr($string, 0, 1, $charset);
 267              if (self::isAlpha($first, $charset)) {
 268                  $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset);
 269              }
 270          } else {
 271              $string = self::upper(substr($string, 0, 1), false) . substr($string, 1);
 272          }
 273  
 274          return $string;
 275      }
 276  
 277      /**
 278       * Returns a string with the first letter of each word capitalized if it is
 279       * alphabetic.
 280       *
 281       * Sentences are splitted into words at whitestrings.
 282       *
 283       * @param string $string   The string to be capitalized.
 284       * @param boolean $locale  If true the string will be converted based on a
 285       *                         given charset, locale independent else.
 286       * @param string $charset  The charset to use, defaults to current charset.
 287       *
 288       * @return string  The capitalized string.
 289       */
 290      public static function ucwords($string, $locale = false, $charset = null)
 291      {
 292          $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
 293          for ($i = 0, $c = count($words); $i < $c; $i += 2) {
 294              $words[$i] = self::ucfirst($words[$i], $locale, $charset);
 295          }
 296          return implode('', $words);
 297      }
 298  
 299      /**
 300       * Returns part of a string.
 301       *
 302       * @param string $string   The string to be converted.
 303       * @param integer $start   The part's start position, zero based.
 304       * @param integer $length  The part's length.
 305       * @param string $charset  The charset to use when calculating the part's
 306       *                         position and length, defaults to current
 307       *                         charset.
 308       *
 309       * @return string  The string's part.
 310       */
 311      public static function substr($string, $start, $length = null,
 312                                    $charset = 'UTF-8')
 313      {
 314          if (is_null($length)) {
 315              $length = self::length($string, $charset) - $start;
 316          }
 317  
 318          if ($length === 0) {
 319              return '';
 320          }
 321  
 322          $error = false;
 323  
 324          /* Try mbstring. */
 325          if (Horde_Util::extensionExists('mbstring')) {
 326              $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset));
 327  
 328              /* mb_substr() returns empty string on failure. */
 329              if (strlen($ret)) {
 330                  return $ret;
 331              }
 332              $error = true;
 333          }
 334  
 335          /* Try iconv. */
 336          if (Horde_Util::extensionExists('iconv')) {
 337              $ret = @iconv_substr($string, $start, $length, $charset);
 338  
 339              /* iconv_substr() returns false on failure. */
 340              if ($ret !== false) {
 341                  return $ret;
 342              }
 343              $error = true;
 344          }
 345  
 346          /* Try intl. */
 347          if (Horde_Util::extensionExists('intl')) {
 348              $ret = self::convertCharset(
 349                  @grapheme_substr(
 350                      self::convertCharset($string, $charset, 'UTF-8'),
 351                      $start,
 352                      $length
 353                  ),
 354                  'UTF-8',
 355                  $charset
 356              );
 357  
 358              /* grapheme_substr() returns false on failure. */
 359              if ($ret !== false) {
 360                  return $ret;
 361              }
 362              $error = true;
 363          }
 364  
 365          return $error
 366              ? ''
 367              : substr($string, $start, $length);
 368      }
 369  
 370      /**
 371       * Returns the character (not byte) length of a string.
 372       *
 373       * @param string $string  The string to return the length of.
 374       * @param string $charset The charset to use when calculating the string's
 375       *                        length.
 376       *
 377       * @return integer  The string's length.
 378       */
 379      public static function length($string, $charset = 'UTF-8')
 380      {
 381          $charset = self::lower($charset);
 382  
 383          if ($charset == 'utf-8' || $charset == 'utf8') {
 384              return strlen(utf8_decode($string));
 385          }
 386  
 387          if (Horde_Util::extensionExists('mbstring')) {
 388              $ret = @mb_strlen($string, self::_mbstringCharset($charset));
 389              if (!empty($ret)) {
 390                  return $ret;
 391              }
 392          }
 393          if (Horde_Util::extensionExists('intl')) {
 394              return grapheme_strlen(
 395                  self::convertCharset($string, $charset, 'UTF-8')
 396              );
 397          }
 398  
 399          return strlen($string);
 400      }
 401  
 402      /**
 403       * Returns the numeric position of the first occurrence of $needle
 404       * in the $haystack string.
 405       *
 406       * @param string $haystack  The string to search through.
 407       * @param string $needle    The string to search for.
 408       * @param integer $offset   Character in $haystack to start searching at.
 409       * @param string $charset   Charset of $needle.
 410       *
 411       * @return integer  The position of first occurrence.
 412       */
 413      public static function pos(
 414          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 415      )
 416      {
 417          return self::_pos($haystack, $needle, $offset, $charset, 'strpos');
 418      }
 419  
 420      /**
 421       * Returns the numeric position of the first case-insensitive occurrence
 422       * of $needle in the $haystack string.
 423       *
 424       * @since 2.5.0
 425       *
 426       * @param string $haystack  The string to search through.
 427       * @param string $needle    The string to search for.
 428       * @param integer $offset   Character in $haystack to start searching at.
 429       * @param string $charset   Charset of $needle.
 430       *
 431       * @return integer  The position of first case-insensitive occurrence.
 432       */
 433      public static function ipos(
 434          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 435      )
 436      {
 437          return self::_pos($haystack, $needle, $offset, $charset, 'stripos');
 438      }
 439  
 440      /**
 441       * Returns the numeric position of the last occurrence of $needle
 442       * in the $haystack string.
 443       *
 444       * @param string $haystack  The string to search through.
 445       * @param string $needle    The string to search for.
 446       * @param integer $offset   Character in $haystack to start searching at.
 447       * @param string $charset   Charset of $needle.
 448       *
 449       * @return integer  The position of last occurrence.
 450       */
 451      public static function rpos(
 452          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 453      )
 454      {
 455          return self::_pos($haystack, $needle, $offset, $charset, 'strrpos');
 456      }
 457  
 458      /**
 459       * Returns the numeric position of the last case-insensitive occurrence of
 460       * $needle in the $haystack string.
 461       *
 462       * @since 2.5.0
 463       *
 464       * @param string $haystack  The string to search through.
 465       * @param string $needle    The string to search for.
 466       * @param integer $offset   Character in $haystack to start searching at.
 467       * @param string $charset   Charset of $needle.
 468       *
 469       * @return integer  The position of last case-insensitive occurrence.
 470       */
 471      public static function ripos(
 472          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 473      )
 474      {
 475          return self::_pos($haystack, $needle, $offset, $charset, 'strripos');
 476      }
 477  
 478      /**
 479       * Perform string position searches.
 480       *
 481       * @param string $haystack  The string to search through.
 482       * @param string $needle    The string to search for.
 483       * @param integer $offset   Character in $haystack to start searching at.
 484       * @param string $charset   Charset of $needle.
 485       * @param string $func      Function to use.
 486       *
 487       * @return integer  The position of occurrence.
 488       *
 489       */
 490      protected static function _pos(
 491          $haystack, $needle, $offset, $charset, $func
 492      )
 493      {
 494          if (Horde_Util::extensionExists('mbstring')) {
 495              unset($php_errormsg);
 496              $track_errors = ini_set('track_errors', 1);
 497              $ret = @call_user_func('mb_' . $func, $haystack, $needle, $offset, self::_mbstringCharset($charset));
 498              ini_set('track_errors', $track_errors);
 499              if (!isset($php_errormsg)) {
 500                  return $ret;
 501              }
 502          }
 503  
 504          if (Horde_Util::extensionExists('intl')) {
 505              unset($php_errormsg);
 506              $track_errors = ini_set('track_errors', 1);
 507              $ret = self::convertCharset(
 508                  @call_user_func(
 509                      'grapheme_' . $func,
 510                      self::convertCharset($haystack, $charset, 'UTF-8'),
 511                      self::convertCharset($needle, $charset, 'UTF-8'),
 512                      $offset
 513                  ),
 514                  'UTF-8',
 515                  $charset
 516              );
 517              ini_set('track_errors', $track_errors);
 518              if (!isset($php_errormsg)) {
 519                  return $ret;
 520              }
 521          }
 522  
 523          return $func($haystack, $needle, $offset);
 524      }
 525  
 526      /**
 527       * Returns a string padded to a certain length with another string.
 528       * This method behaves exactly like str_pad() but is multibyte safe.
 529       *
 530       * @param string $input    The string to be padded.
 531       * @param integer $length  The length of the resulting string.
 532       * @param string $pad      The string to pad the input string with. Must
 533       *                         be in the same charset like the input string.
 534       * @param const $type      The padding type. One of STR_PAD_LEFT,
 535       *                         STR_PAD_RIGHT, or STR_PAD_BOTH.
 536       * @param string $charset  The charset of the input and the padding
 537       *                         strings.
 538       *
 539       * @return string  The padded string.
 540       */
 541      public static function pad($input, $length, $pad = ' ',
 542                                 $type = STR_PAD_RIGHT, $charset = 'UTF-8')
 543      {
 544          $mb_length = self::length($input, $charset);
 545          $sb_length = strlen($input);
 546          $pad_length = self::length($pad, $charset);
 547  
 548          /* Return if we already have the length. */
 549          if ($mb_length >= $length) {
 550              return $input;
 551          }
 552  
 553          /* Shortcut for single byte strings. */
 554          if ($mb_length == $sb_length && $pad_length == strlen($pad)) {
 555              return str_pad($input, $length, $pad, $type);
 556          }
 557  
 558          switch ($type) {
 559          case STR_PAD_LEFT:
 560              $left = $length - $mb_length;
 561              $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input;
 562              break;
 563  
 564          case STR_PAD_BOTH:
 565              $left = floor(($length - $mb_length) / 2);
 566              $right = ceil(($length - $mb_length) / 2);
 567              $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) .
 568                  $input .
 569                  self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
 570              break;
 571  
 572          case STR_PAD_RIGHT:
 573              $right = $length - $mb_length;
 574              $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
 575              break;
 576          }
 577  
 578          return $output;
 579      }
 580  
 581      /**
 582       * Wraps the text of a message.
 583       *
 584       * @param string $string         String containing the text to wrap.
 585       * @param integer $width         Wrap the string at this number of
 586       *                               characters.
 587       * @param string $break          Character(s) to use when breaking lines.
 588       * @param boolean $cut           Whether to cut inside words if a line
 589       *                               can't be wrapped.
 590       * @param boolean $line_folding  Whether to apply line folding rules per
 591       *                               RFC 822 or similar. The correct break
 592       *                               characters including leading whitespace
 593       *                               have to be specified too.
 594       *
 595       * @return string  String containing the wrapped text.
 596       */
 597      public static function wordwrap($string, $width = 75, $break = "\n",
 598                                      $cut = false, $line_folding = false)
 599      {
 600          $breakRegex = '(?:' . preg_quote($break) . ')';
 601          $rpos = self::rpos($break, "\n");
 602          if ($rpos === false) {
 603              $rpos = 0;
 604          } else {
 605              $rpos++;
 606          }
 607          $wrapped = '';
 608          $hasWrapped = false;
 609  
 610          while (self::length($string, 'UTF-8') > $width) {
 611              $line = self::substr($string, 0, $width + ($hasWrapped ? $rpos : 0), 'UTF-8');
 612              $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8');
 613  
 614              // Make sure we didn't cut a word, unless we want hard breaks
 615              // anyway.
 616              if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) {
 617                  $line .= $match[1];
 618                  $string = $match[2];
 619              }
 620  
 621              // Wrap at existing line breaks.
 622              $regex = '/^(' . ($hasWrapped ? $breakRegex : '') . '.*?)(\r?\n)(.*)$/us';
 623              if (preg_match($regex, $line, $match)) {
 624                  $wrapped .= $match[1] . $match[2];
 625                  $string = $match[3] . $string;
 626                  $hasWrapped = false;
 627                  continue;
 628              }
 629  
 630              // Wrap at the last colon or semicolon followed by a whitespace if
 631              // doing line folding.
 632              if ($line_folding &&
 633                  preg_match('/^(.*?)(;|:)(\s+.*)$/us', $line, $match)) {
 634                  $wrapped .= $match[1] . $match[2];
 635                  $string = $break . $match[3] . $string;
 636                  $hasWrapped = true;
 637                  continue;
 638              }
 639  
 640              // Wrap at the last whitespace of $line.
 641              $sub = $line_folding
 642                  ? '(' . ($hasWrapped ? $breakRegex : '') . '.+[^\s])'
 643                  : '(' . ($hasWrapped ? $breakRegex : '') . '.*)';
 644  
 645              if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) {
 646                  $wrapped .= $match[1];
 647                  $string = $break . ($line_folding ? $match[2] : '')
 648                      . $match[3] . $string;
 649                  $hasWrapped = true;
 650                  continue;
 651              }
 652  
 653              // Hard wrap if necessary.
 654              if ($cut) {
 655                  $wrapped .= $line;
 656                  $string = $break . $string;
 657                  $hasWrapped = true;
 658                  continue;
 659              }
 660  
 661              $wrapped .= $line;
 662              $hasWrapped = false;
 663          }
 664  
 665          return $wrapped . $string;
 666      }
 667  
 668      /**
 669       * Wraps the text of a message.
 670       *
 671       * @param string $text        String containing the text to wrap.
 672       * @param integer $length     Wrap $text at this number of characters.
 673       * @param string $break_char  Character(s) to use when breaking lines.
 674       * @param boolean $quote      Ignore lines that are wrapped with the '>'
 675       *                            character (RFC 2646)? If true, we don't
 676       *                            remove any padding whitespace at the end of
 677       *                            the string.
 678       *
 679       * @return string  String containing the wrapped text.
 680       */
 681      public static function wrap($text, $length = 80, $break_char = "\n",
 682                                  $quote = false)
 683      {
 684          $paragraphs = array();
 685  
 686          foreach (preg_split('/\r?\n/', $text) as $input) {
 687              if ($quote && (strpos($input, '>') === 0)) {
 688                  $line = $input;
 689              } else {
 690                  /* We need to handle the Usenet-style signature line
 691                   * separately; since the space after the two dashes is
 692                   * REQUIRED, we don't want to trim the line. */
 693                  if ($input != '-- ') {
 694                      $input = rtrim($input);
 695                  }
 696                  $line = self::wordwrap($input, $length, $break_char);
 697              }
 698  
 699              $paragraphs[] = $line;
 700          }
 701  
 702          return implode($break_char, $paragraphs);
 703      }
 704  
 705      /**
 706       * Return a truncated string, suitable for notifications.
 707       *
 708       * @param string $text     The original string.
 709       * @param integer $length  The maximum length.
 710       *
 711       * @return string  The truncated string, if longer than $length.
 712       */
 713      public static function truncate($text, $length = 100)
 714      {
 715          return (self::length($text) > $length)
 716              ? rtrim(self::substr($text, 0, $length - 3)) . '...'
 717              : $text;
 718      }
 719  
 720      /**
 721       * Return an abbreviated string, with characters in the middle of the
 722       * excessively long string replaced by '...'.
 723       *
 724       * @param string $text     The original string.
 725       * @param integer $length  The length at which to abbreviate.
 726       *
 727       * @return string  The abbreviated string, if longer than $length.
 728       */
 729      public static function abbreviate($text, $length = 20)
 730      {
 731          return (self::length($text) > $length)
 732              ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1))
 733              : $text;
 734      }
 735  
 736      /**
 737       * Returns the common leading part of two strings.
 738       *
 739       * @param string $str1  A string.
 740       * @param string $str2  Another string.
 741       *
 742       * @return string  The start of $str1 and $str2 that is identical in both.
 743       */
 744      public static function common($str1, $str2)
 745      {
 746          for ($result = '', $i = 0;
 747               isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i];
 748               $i++) {
 749              $result .= $str1[$i];
 750          }
 751          return $result;
 752      }
 753  
 754      /**
 755       * Returns true if the every character in the parameter is an alphabetic
 756       * character.
 757       *
 758       * @param string $string   The string to test.
 759       * @param string $charset  The charset to use when testing the string.
 760       *
 761       * @return boolean  True if the parameter was alphabetic only.
 762       */
 763      public static function isAlpha($string, $charset)
 764      {
 765          if (!Horde_Util::extensionExists('mbstring')) {
 766              return ctype_alpha($string);
 767          }
 768  
 769          $charset = self::_mbstringCharset($charset);
 770          $old_charset = mb_regex_encoding();
 771  
 772          if ($charset != $old_charset) {
 773              @mb_regex_encoding($charset);
 774          }
 775          $alpha = !@mb_ereg_match('[^[:alpha:]]', $string);
 776          if ($charset != $old_charset) {
 777              @mb_regex_encoding($old_charset);
 778          }
 779  
 780          return $alpha;
 781      }
 782  
 783      /**
 784       * Returns true if ever character in the parameter is a lowercase letter in
 785       * the current locale.
 786       *
 787       * @param string $string   The string to test.
 788       * @param string $charset  The charset to use when testing the string.
 789       *
 790       * @return boolean  True if the parameter was lowercase.
 791       */
 792      public static function isLower($string, $charset)
 793      {
 794          return ((self::lower($string, true, $charset) === $string) &&
 795                  self::isAlpha($string, $charset));
 796      }
 797  
 798      /**
 799       * Returns true if every character in the parameter is an uppercase letter
 800       * in the current locale.
 801       *
 802       * @param string $string   The string to test.
 803       * @param string $charset  The charset to use when testing the string.
 804       *
 805       * @return boolean  True if the parameter was uppercase.
 806       */
 807      public static function isUpper($string, $charset)
 808      {
 809          return ((self::upper($string, true, $charset) === $string) &&
 810                  self::isAlpha($string, $charset));
 811      }
 812  
 813      /**
 814       * Performs a multibyte safe regex match search on the text provided.
 815       *
 816       * @param string $text     The text to search.
 817       * @param array $regex     The regular expressions to use, without perl
 818       *                         regex delimiters (e.g. '/' or '|').
 819       * @param string $charset  The character set of the text.
 820       *
 821       * @return array  The matches array from the first regex that matches.
 822       */
 823      public static function regexMatch($text, $regex, $charset = null)
 824      {
 825          if (!empty($charset)) {
 826              $regex = self::convertCharset($regex, $charset, 'utf-8');
 827              $text = self::convertCharset($text, $charset, 'utf-8');
 828          }
 829  
 830          $matches = array();
 831          foreach ($regex as $val) {
 832              if (preg_match('/' . $val . '/u', $text, $matches)) {
 833                  break;
 834              }
 835          }
 836  
 837          if (!empty($charset)) {
 838              $matches = self::convertCharset($matches, 'utf-8', $charset);
 839          }
 840  
 841          return $matches;
 842      }
 843  
 844      /**
 845       * Check to see if a string is valid UTF-8.
 846       *
 847       * @param string $text  The text to check.
 848       *
 849       * @return boolean  True if valid UTF-8.
 850       */
 851      public static function validUtf8($text)
 852      {
 853          $text = strval($text);
 854  
 855          // First check for illegal surrogate pair sequences. See RFC 3629.
 856          if (preg_match('/\xE0[\x80-\x9F][\x80-\xBF]|\xED[\xA0-\xBF][\x80-\xBF]/S', $text)) {
 857              return false;
 858          }
 859  
 860          for ($i = 0, $len = strlen($text); $i < $len; ++$i) {
 861              $c = ord($text[$i]);
 862              if ($c > 128) {
 863                  if ($c > 247) {
 864                      // STD 63 (RFC 3629) eliminates 5 & 6-byte characters.
 865                      return false;
 866                  } elseif ($c > 239) {
 867                      $j = 3;
 868                  } elseif ($c > 223) {
 869                      $j = 2;
 870                  } elseif ($c > 191) {
 871                      $j = 1;
 872                  } else {
 873                      return false;
 874                  }
 875  
 876                  if (($i + $j) > $len) {
 877                      return false;
 878                  }
 879  
 880                  do {
 881                      $c = ord($text[++$i]);
 882                      if (($c < 128) || ($c > 191)) {
 883                          return false;
 884                      }
 885                  } while (--$j);
 886              }
 887          }
 888  
 889          return true;
 890      }
 891  
 892      /**
 893       * Workaround charsets that don't work with mbstring functions.
 894       *
 895       * @param string $charset  The original charset.
 896       *
 897       * @return string  The charset to use with mbstring functions.
 898       */
 899      protected static function _mbstringCharset($charset)
 900      {
 901          /* mbstring functions do not handle the 'ks_c_5601-1987' &
 902           * 'ks_c_5601-1989' charsets. However, these charsets are used, for
 903           * example, by various versions of Outlook to send Korean characters.
 904           * Use UHC (CP949) encoding instead. See, e.g.,
 905           * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */
 906          return in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989'))
 907              ? 'UHC'
 908              : $charset;
 909      }
 910  
 911      /**
 912       * Strip UTF-8 byte order mark (BOM) from string data.
 913       *
 914       * @param string $str  Input string (UTF-8).
 915       *
 916       * @return string  Stripped string (UTF-8).
 917       */
 918      public static function trimUtf8Bom($str)
 919      {
 920          return (substr($str, 0, 3) == pack('CCC', 239, 187, 191))
 921              ? substr($str, 3)
 922              : $str;
 923      }
 924  
 925  }