Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.2.x will end 22 April 2024 (12 months).
  • Bug fixes for security issues in 4.2.x will end 7 October 2024 (18 months).
  • PHP version: minimum PHP 8.0.0 Note: minimum PHP version has increased since Moodle 4.1. PHP 8.1.x is supported too.

Differences Between: [Versions 310 and 402] [Versions 311 and 402] [Versions 39 and 402] [Versions 400 and 402] [Versions 401 and 402]

   1  <?php
   2  /**
   3   * Provides static methods for charset and locale safe string manipulation.
   4   *
   5   * Copyright 2003-2017 Horde LLC (http://www.horde.org/)
   6   *
   7   * See the enclosed file LICENSE for license information (LGPL). If you
   8   * did not receive this file, see http://www.horde.org/licenses/lgpl21.
   9   *
  10   * @todo Split up in Horde_String_Multibyte for multibyte-safe methods and
  11   *       Horde_String_Locale for locale-safe methods.
  12   *
  13   * @author   Jan Schneider <jan@horde.org>
  14   * @category Horde
  15   * @license  http://www.horde.org/licenses/lgpl21 LGPL 2.1
  16   * @package  Util
  17   */
  18  class Horde_String
  19  {
  20      /**
  21       * lower() cache.
  22       *
  23       * @var array
  24       */
  25      protected static $_lowers = array();
  26  
  27      /**
  28       * upper() cache.
  29       *
  30       * @var array
  31       */
  32      protected static $_uppers = array();
  33  
  34      /**
  35       * Converts a string from one charset to another.
  36       *
  37       * Uses the iconv or the mbstring extensions.
  38       * The original string is returned if conversion failed or none
  39       * of the extensions were available.
  40       *
  41       * @param mixed $input    The data to be converted. If $input is an an
  42       *                        array, the array's values get converted
  43       *                        recursively.
  44       * @param string $from    The string's current charset.
  45       * @param string $to      The charset to convert the string to.
  46       * @param boolean $force  Force conversion?
  47       *
  48       * @return mixed  The converted input data.
  49       */
  50      public static function convertCharset($input, $from, $to, $force = false)
  51      {
  52          /* Don't bother converting numbers. */
  53          if (is_numeric($input)) {
  54              return $input;
  55          }
  56  
  57          /* If the from and to character sets are identical, return now. */
  58          if (!$force && $from == $to) {
  59              return $input;
  60          }
  61          $from = self::lower($from);
  62          $to = self::lower($to);
  63          if (!$force && $from == $to) {
  64              return $input;
  65          }
  66  
  67          if (is_array($input)) {
  68              $tmp = array();
  69              foreach ($input as $key => $val) {
  70                  $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force);
  71              }
  72              return $tmp;
  73          }
  74  
  75          if (is_object($input)) {
  76              // PEAR_Error/Exception objects are almost guaranteed to contain
  77              // recursion, which will cause a segfault in PHP. We should never
  78              // reach this line, but add a check.
  79              if (($input instanceof Exception) ||
  80                  ($input instanceof PEAR_Error)) {
  81                  return '';
  82              }
  83  
  84              $input = clone $input;
  85              $vars = get_object_vars($input);
  86              foreach ($vars as $key => $val) {
  87                  $input->$key = self::convertCharset($val, $from, $to, $force);
  88              }
  89              return $input;
  90          }
  91  
  92          if (!is_string($input)) {
  93              return $input;
  94          }
  95  
  96          return self::_convertCharset($input, $from, $to);
  97      }
  98  
  99      /**
 100       * Internal function used to do charset conversion.
 101       *
 102       * @param string $input  See self::convertCharset().
 103       * @param string $from   See self::convertCharset().
 104       * @param string $to     See self::convertCharset().
 105       *
 106       * @return string  The converted string.
 107       */
 108      protected static function _convertCharset($input, $from, $to)
 109      {
 110          /* Use utf8_[en|de]code() if possible and if the string isn't too
 111           * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these
 112           * functions use more memory. */
 113          if (Horde_Util::extensionExists('xml') &&
 114              ((strlen($input) < 16777216) ||
 115               !Horde_Util::extensionExists('iconv') ||
 116               !Horde_Util::extensionExists('mbstring'))) {
 117              if (($to == 'utf-8') &&
 118                  function_exists('utf8_encode') &&
 119                  in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
 120                  return @utf8_encode($input);
 121              }
 122  
 123              if (($from == 'utf-8') &&
 124                  function_exists('utf8_decode') &&
 125                  in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
 126                  return @utf8_decode($input);
 127              }
 128          }
 129  
 130          /* Try UTF7-IMAP conversions. */
 131          if (($from == 'utf7-imap') || ($to == 'utf7-imap')) {
 132              try {
 133                  if ($from == 'utf7-imap') {
 134                      return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to);
 135                  } else {
 136                      if ($from == 'utf-8') {
 137                          $conv = $input;
 138                      } else {
 139                          $conv = self::convertCharset($input, $from, 'UTF-8');
 140                      }
 141                      return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv);
 142                  }
 143              } catch (Horde_Imap_Client_Exception $e) {
 144                  return $input;
 145              }
 146          }
 147  
 148          /* Try iconv with transliteration. */
 149          if (Horde_Util::extensionExists('iconv')) {
 150              unset($php_errormsg);
 151              ini_set('track_errors', 1);
 152              $out = @iconv($from, $to . '//TRANSLIT', $input);
 153              $errmsg = isset($php_errormsg);
 154              ini_restore('track_errors');
 155              if (!$errmsg && $out !== false) {
 156                  return $out;
 157              }
 158          }
 159  
 160          /* Try mbstring. */
 161          if (Horde_Util::extensionExists('mbstring')) {
 162              try {
 163                  $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from));
 164                  if (!empty($out)) {
 165                      return $out;
 166                  }
 167              } catch (ValueError $e) {
 168                  // catch error thrown under PHP 8.0, if mbstring does not support the encoding
 169              }
 170          }
 171  
 172          return $input;
 173      }
 174  
 175      /**
 176       * Makes a string lowercase.
 177       *
 178       * @param string $string   The string to be converted.
 179       * @param boolean $locale  If true the string will be converted based on
 180       *                         a given charset, locale independent else.
 181       * @param string $charset  If $locale is true, the charset to use when
 182       *                         converting.
 183       *
 184       * @return string  The string with lowercase characters.
 185       */
 186      public static function lower($string, $locale = false, $charset = null)
 187      {
 188          if ($locale) {
 189              if (Horde_Util::extensionExists('mbstring')) {
 190                  if (is_null($charset)) {
 191                      throw new InvalidArgumentException('$charset argument must not be null');
 192                  }
 193                  $ret = @mb_strtolower($string, self::_mbstringCharset($charset));
 194                  if (!empty($ret)) {
 195                      return $ret;
 196                  }
 197              }
 198              return strtolower($string);
 199          }
 200  
 201          if (!isset(self::$_lowers[$string])) {
 202              $language = setlocale(LC_CTYPE, 0);
 203              setlocale(LC_CTYPE, 'C');
 204              if ($string === null) {
 205                  self::$_lowers[$string] = '';
 206              } else {
 207                  self::$_lowers[$string] = strtolower($string);
 208              }
 209              setlocale(LC_CTYPE, $language);
 210          }
 211  
 212          return self::$_lowers[$string];
 213      }
 214  
 215      /**
 216       * Makes a string uppercase.
 217       *
 218       * @param string $string   The string to be converted.
 219       * @param boolean $locale  If true the string will be converted based on a
 220       *                         given charset, locale independent else.
 221       * @param string $charset  If $locale is true, the charset to use when
 222       *                         converting. If not provided the current charset.
 223       *
 224       * @return string  The string with uppercase characters.
 225       */
 226      public static function upper($string, $locale = false, $charset = null)
 227      {
 228          if ($locale) {
 229              if (Horde_Util::extensionExists('mbstring')) {
 230                  if (is_null($charset)) {
 231                      throw new InvalidArgumentException('$charset argument must not be null');
 232                  }
 233                  $ret = @mb_strtoupper($string, self::_mbstringCharset($charset));
 234                  if (!empty($ret)) {
 235                      return $ret;
 236                  }
 237              }
 238              return strtoupper($string);
 239          }
 240  
 241          if (!isset(self::$_uppers[$string])) {
 242              $language = setlocale(LC_CTYPE, 0);
 243              setlocale(LC_CTYPE, 'C');
 244              self::$_uppers[$string] = strtoupper($string);
 245              setlocale(LC_CTYPE, $language);
 246          }
 247  
 248          return self::$_uppers[$string];
 249      }
 250  
 251      /**
 252       * Returns a string with the first letter capitalized if it is
 253       * alphabetic.
 254       *
 255       * @param string $string   The string to be capitalized.
 256       * @param boolean $locale  If true the string will be converted based on a
 257       *                         given charset, locale independent else.
 258       * @param string $charset  The charset to use, defaults to current charset.
 259       *
 260       * @return string  The capitalized string.
 261       */
 262      public static function ucfirst($string, $locale = false, $charset = null)
 263      {
 264          if ($locale) {
 265              if (is_null($charset)) {
 266                  throw new InvalidArgumentException('$charset argument must not be null');
 267              }
 268              $first = self::substr($string, 0, 1, $charset);
 269              if (self::isAlpha($first, $charset)) {
 270                  $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset);
 271              }
 272          } else {
 273              $string = self::upper(substr($string, 0, 1), false) . substr($string, 1);
 274          }
 275  
 276          return $string;
 277      }
 278  
 279      /**
 280       * Returns a string with the first letter of each word capitalized if it is
 281       * alphabetic.
 282       *
 283       * Sentences are splitted into words at whitestrings.
 284       *
 285       * @param string $string   The string to be capitalized.
 286       * @param boolean $locale  If true the string will be converted based on a
 287       *                         given charset, locale independent else.
 288       * @param string $charset  The charset to use, defaults to current charset.
 289       *
 290       * @return string  The capitalized string.
 291       */
 292      public static function ucwords($string, $locale = false, $charset = null)
 293      {
 294          $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
 295          for ($i = 0, $c = count($words); $i < $c; $i += 2) {
 296              $words[$i] = self::ucfirst($words[$i], $locale, $charset);
 297          }
 298          return implode('', $words);
 299      }
 300  
 301      /**
 302       * Returns part of a string.
 303       *
 304       * @param string $string   The string to be converted.
 305       * @param integer $start   The part's start position, zero based.
 306       * @param integer $length  The part's length.
 307       * @param string $charset  The charset to use when calculating the part's
 308       *                         position and length, defaults to current
 309       *                         charset.
 310       *
 311       * @return string  The string's part.
 312       */
 313      public static function substr($string, $start, $length = null,
 314                                    $charset = 'UTF-8')
 315      {
 316          if (is_null($length)) {
 317              $length = self::length($string, $charset) - $start;
 318          }
 319  
 320          if ($length === 0) {
 321              return '';
 322          }
 323  
 324          $error = false;
 325  
 326          /* Try mbstring. */
 327          if (Horde_Util::extensionExists('mbstring')) {
 328              $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset));
 329  
 330              /* mb_substr() returns empty string on failure. */
 331              if (strlen($ret)) {
 332                  return $ret;
 333              }
 334              $error = true;
 335          }
 336  
 337          /* Try iconv. */
 338          if (Horde_Util::extensionExists('iconv')) {
 339              $ret = @iconv_substr($string, $start, $length, $charset);
 340  
 341              /* iconv_substr() returns false on failure. */
 342              if ($ret !== false) {
 343                  return $ret;
 344              }
 345              $error = true;
 346          }
 347  
 348          /* Try intl. */
 349          if (Horde_Util::extensionExists('intl')) {
 350              $ret = self::convertCharset(
 351                  @grapheme_substr(
 352                      self::convertCharset($string, $charset, 'UTF-8'),
 353                      $start,
 354                      $length
 355                  ),
 356                  'UTF-8',
 357                  $charset
 358              );
 359  
 360              /* grapheme_substr() returns false on failure. */
 361              if ($ret !== false) {
 362                  return $ret;
 363              }
 364              $error = true;
 365          }
 366  
 367          return $error
 368              ? ''
 369              : substr($string, $start, $length);
 370      }
 371  
 372      /**
 373       * Returns the character (not byte) length of a string.
 374       *
 375       * @param string $string  The string to return the length of.
 376       * @param string $charset The charset to use when calculating the string's
 377       *                        length.
 378       *
 379       * @return integer  The string's length.
 380       */
 381      public static function length($string, $charset = 'UTF-8')
 382      {
 383          $charset = self::lower($charset);
 384  
 385          if ($charset == 'utf-8' || $charset == 'utf8') {
 386              if (Horde_Util::extensionExists('mbstring')) {
 387                  return strlen(mb_convert_encoding($string, 'ISO-8859-1', 'UTF-8'));
 388  
 389              } else if (function_exists('utf8_decode')) {
 390                  return strlen(@utf8_decode($string));
 391              }
 392          }
 393  
 394          if (Horde_Util::extensionExists('mbstring')) {
 395              $ret = @mb_strlen($string, self::_mbstringCharset($charset));
 396              if (!empty($ret)) {
 397                  return $ret;
 398              }
 399          }
 400          if (Horde_Util::extensionExists('intl')) {
 401              return grapheme_strlen(
 402                  self::convertCharset($string, $charset, 'UTF-8')
 403              );
 404          }
 405  
 406          return strlen($string);
 407      }
 408  
 409      /**
 410       * Returns the numeric position of the first occurrence of $needle
 411       * in the $haystack string.
 412       *
 413       * @param string $haystack  The string to search through.
 414       * @param string $needle    The string to search for.
 415       * @param integer $offset   Character in $haystack to start searching at.
 416       * @param string $charset   Charset of $needle.
 417       *
 418       * @return integer  The position of first occurrence.
 419       */
 420      public static function pos(
 421          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 422      )
 423      {
 424          return self::_pos($haystack, $needle, $offset, $charset, 'strpos');
 425      }
 426  
 427      /**
 428       * Returns the numeric position of the first case-insensitive occurrence
 429       * of $needle in the $haystack string.
 430       *
 431       * @since 2.5.0
 432       *
 433       * @param string $haystack  The string to search through.
 434       * @param string $needle    The string to search for.
 435       * @param integer $offset   Character in $haystack to start searching at.
 436       * @param string $charset   Charset of $needle.
 437       *
 438       * @return integer  The position of first case-insensitive occurrence.
 439       */
 440      public static function ipos(
 441          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 442      )
 443      {
 444          return self::_pos($haystack, $needle, $offset, $charset, 'stripos');
 445      }
 446  
 447      /**
 448       * Returns the numeric position of the last occurrence of $needle
 449       * in the $haystack string.
 450       *
 451       * @param string $haystack  The string to search through.
 452       * @param string $needle    The string to search for.
 453       * @param integer $offset   Character in $haystack to start searching at.
 454       * @param string $charset   Charset of $needle.
 455       *
 456       * @return integer  The position of last occurrence.
 457       */
 458      public static function rpos(
 459          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 460      )
 461      {
 462          return self::_pos($haystack, $needle, $offset, $charset, 'strrpos');
 463      }
 464  
 465      /**
 466       * Returns the numeric position of the last case-insensitive occurrence of
 467       * $needle in the $haystack string.
 468       *
 469       * @since 2.5.0
 470       *
 471       * @param string $haystack  The string to search through.
 472       * @param string $needle    The string to search for.
 473       * @param integer $offset   Character in $haystack to start searching at.
 474       * @param string $charset   Charset of $needle.
 475       *
 476       * @return integer  The position of last case-insensitive occurrence.
 477       */
 478      public static function ripos(
 479          $haystack, $needle, $offset = 0, $charset = 'UTF-8'
 480      )
 481      {
 482          return self::_pos($haystack, $needle, $offset, $charset, 'strripos');
 483      }
 484  
 485      /**
 486       * Perform string position searches.
 487       *
 488       * @param string $haystack  The string to search through.
 489       * @param string $needle    The string to search for.
 490       * @param integer $offset   Character in $haystack to start searching at.
 491       * @param string $charset   Charset of $needle.
 492       * @param string $func      Function to use.
 493       *
 494       * @return integer  The position of occurrence.
 495       *
 496       */
 497      protected static function _pos(
 498          $haystack, $needle, $offset, $charset, $func
 499      )
 500      {
 501          if (Horde_Util::extensionExists('mbstring')) {
 502              unset($php_errormsg);
 503              $track_errors = ini_set('track_errors', 1);
 504              $ret = @call_user_func('mb_' . $func, $haystack, $needle, $offset, self::_mbstringCharset($charset));
 505              ini_set('track_errors', $track_errors);
 506              if (!isset($php_errormsg)) {
 507                  return $ret;
 508              }
 509          }
 510  
 511          if (Horde_Util::extensionExists('intl')) {
 512              unset($php_errormsg);
 513              $track_errors = ini_set('track_errors', 1);
 514              $ret = self::convertCharset(
 515                  @call_user_func(
 516                      'grapheme_' . $func,
 517                      self::convertCharset($haystack, $charset, 'UTF-8'),
 518                      self::convertCharset($needle, $charset, 'UTF-8'),
 519                      $offset
 520                  ),
 521                  'UTF-8',
 522                  $charset
 523              );
 524              ini_set('track_errors', $track_errors);
 525              if (!isset($php_errormsg)) {
 526                  return $ret;
 527              }
 528          }
 529  
 530          return $func($haystack, $needle, $offset);
 531      }
 532  
 533      /**
 534       * Returns a string padded to a certain length with another string.
 535       * This method behaves exactly like str_pad() but is multibyte safe.
 536       *
 537       * @param string $input    The string to be padded.
 538       * @param integer $length  The length of the resulting string.
 539       * @param string $pad      The string to pad the input string with. Must
 540       *                         be in the same charset like the input string.
 541       * @param const $type      The padding type. One of STR_PAD_LEFT,
 542       *                         STR_PAD_RIGHT, or STR_PAD_BOTH.
 543       * @param string $charset  The charset of the input and the padding
 544       *                         strings.
 545       *
 546       * @return string  The padded string.
 547       */
 548      public static function pad($input, $length, $pad = ' ',
 549                                 $type = STR_PAD_RIGHT, $charset = 'UTF-8')
 550      {
 551          $mb_length = self::length($input, $charset);
 552          $sb_length = strlen($input);
 553          $pad_length = self::length($pad, $charset);
 554  
 555          /* Return if we already have the length. */
 556          if ($mb_length >= $length) {
 557              return $input;
 558          }
 559  
 560          /* Shortcut for single byte strings. */
 561          if ($mb_length == $sb_length && $pad_length == strlen($pad)) {
 562              return str_pad($input, $length, $pad, $type);
 563          }
 564  
 565          switch ($type) {
 566          case STR_PAD_LEFT:
 567              $left = $length - $mb_length;
 568              $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input;
 569              break;
 570  
 571          case STR_PAD_BOTH:
 572              $left = floor(($length - $mb_length) / 2);
 573              $right = ceil(($length - $mb_length) / 2);
 574              $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) .
 575                  $input .
 576                  self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
 577              break;
 578  
 579          case STR_PAD_RIGHT:
 580              $right = $length - $mb_length;
 581              $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
 582              break;
 583          }
 584  
 585          return $output;
 586      }
 587  
 588      /**
 589       * Wraps the text of a message.
 590       *
 591       * @param string $string         String containing the text to wrap.
 592       * @param integer $width         Wrap the string at this number of
 593       *                               characters.
 594       * @param string $break          Character(s) to use when breaking lines.
 595       * @param boolean $cut           Whether to cut inside words if a line
 596       *                               can't be wrapped.
 597       * @param boolean $line_folding  Whether to apply line folding rules per
 598       *                               RFC 822 or similar. The correct break
 599       *                               characters including leading whitespace
 600       *                               have to be specified too.
 601       *
 602       * @return string  String containing the wrapped text.
 603       */
 604      public static function wordwrap($string, $width = 75, $break = "\n",
 605                                      $cut = false, $line_folding = false)
 606      {
 607          $breakRegex = '(?:' . preg_quote($break) . ')';
 608          $rpos = self::rpos($break, "\n");
 609          if ($rpos === false) {
 610              $rpos = 0;
 611          } else {
 612              $rpos++;
 613          }
 614          $wrapped = '';
 615          $hasWrapped = false;
 616  
 617          while (self::length($string, 'UTF-8') > $width) {
 618              $line = self::substr($string, 0, $width + ($hasWrapped ? $rpos : 0), 'UTF-8');
 619              $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8');
 620  
 621              // Make sure we didn't cut a word, unless we want hard breaks
 622              // anyway.
 623              if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) {
 624                  $line .= $match[1];
 625                  $string = $match[2];
 626              }
 627  
 628              // Wrap at existing line breaks.
 629              $regex = '/^(' . ($hasWrapped ? $breakRegex : '') . '.*?)(\r?\n)(.*)$/us';
 630              if (preg_match($regex, $line, $match)) {
 631                  $wrapped .= $match[1] . $match[2];
 632                  $string = $match[3] . $string;
 633                  $hasWrapped = false;
 634                  continue;
 635              }
 636  
 637              // Wrap at the last colon or semicolon followed by a whitespace if
 638              // doing line folding.
 639              if ($line_folding &&
 640                  preg_match('/^(.*?)(;|:)(\s+.*)$/us', $line, $match)) {
 641                  $wrapped .= $match[1] . $match[2];
 642                  $string = $break . $match[3] . $string;
 643                  $hasWrapped = true;
 644                  continue;
 645              }
 646  
 647              // Wrap at the last whitespace of $line.
 648              $sub = $line_folding
 649                  ? '(' . ($hasWrapped ? $breakRegex : '') . '.+[^\s])'
 650                  : '(' . ($hasWrapped ? $breakRegex : '') . '.*)';
 651  
 652              if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) {
 653                  $wrapped .= $match[1];
 654                  $string = $break . ($line_folding ? $match[2] : '')
 655                      . $match[3] . $string;
 656                  $hasWrapped = true;
 657                  continue;
 658              }
 659  
 660              // Hard wrap if necessary.
 661              if ($cut) {
 662                  $wrapped .= $line;
 663                  $string = $break . $string;
 664                  $hasWrapped = true;
 665                  continue;
 666              }
 667  
 668              $wrapped .= $line;
 669              $hasWrapped = false;
 670          }
 671  
 672          return $wrapped . $string;
 673      }
 674  
 675      /**
 676       * Wraps the text of a message.
 677       *
 678       * @param string $text        String containing the text to wrap.
 679       * @param integer $length     Wrap $text at this number of characters.
 680       * @param string $break_char  Character(s) to use when breaking lines.
 681       * @param boolean $quote      Ignore lines that are wrapped with the '>'
 682       *                            character (RFC 2646)? If true, we don't
 683       *                            remove any padding whitespace at the end of
 684       *                            the string.
 685       *
 686       * @return string  String containing the wrapped text.
 687       */
 688      public static function wrap($text, $length = 80, $break_char = "\n",
 689                                  $quote = false)
 690      {
 691          $paragraphs = array();
 692  
 693          foreach (preg_split('/\r?\n/', $text) as $input) {
 694              if ($quote && (strpos($input, '>') === 0)) {
 695                  $line = $input;
 696              } else {
 697                  /* We need to handle the Usenet-style signature line
 698                   * separately; since the space after the two dashes is
 699                   * REQUIRED, we don't want to trim the line. */
 700                  if ($input != '-- ') {
 701                      $input = rtrim($input);
 702                  }
 703                  $line = self::wordwrap($input, $length, $break_char);
 704              }
 705  
 706              $paragraphs[] = $line;
 707          }
 708  
 709          return implode($break_char, $paragraphs);
 710      }
 711  
 712      /**
 713       * Return a truncated string, suitable for notifications.
 714       *
 715       * @param string $text     The original string.
 716       * @param integer $length  The maximum length.
 717       *
 718       * @return string  The truncated string, if longer than $length.
 719       */
 720      public static function truncate($text, $length = 100)
 721      {
 722          return (self::length($text) > $length)
 723              ? rtrim(self::substr($text, 0, $length - 3)) . '...'
 724              : $text;
 725      }
 726  
 727      /**
 728       * Return an abbreviated string, with characters in the middle of the
 729       * excessively long string replaced by '...'.
 730       *
 731       * @param string $text     The original string.
 732       * @param integer $length  The length at which to abbreviate.
 733       *
 734       * @return string  The abbreviated string, if longer than $length.
 735       */
 736      public static function abbreviate($text, $length = 20)
 737      {
 738          return (self::length($text) > $length)
 739              ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1))
 740              : $text;
 741      }
 742  
 743      /**
 744       * Returns the common leading part of two strings.
 745       *
 746       * @param string $str1  A string.
 747       * @param string $str2  Another string.
 748       *
 749       * @return string  The start of $str1 and $str2 that is identical in both.
 750       */
 751      public static function common($str1, $str2)
 752      {
 753          for ($result = '', $i = 0;
 754               isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i];
 755               $i++) {
 756              $result .= $str1[$i];
 757          }
 758          return $result;
 759      }
 760  
 761      /**
 762       * Returns true if the every character in the parameter is an alphabetic
 763       * character.
 764       *
 765       * @param string $string   The string to test.
 766       * @param string $charset  The charset to use when testing the string.
 767       *
 768       * @return boolean  True if the parameter was alphabetic only.
 769       */
 770      public static function isAlpha($string, $charset)
 771      {
 772          if (!Horde_Util::extensionExists('mbstring')) {
 773              return ctype_alpha($string);
 774          }
 775  
 776          $charset = self::_mbstringCharset($charset);
 777          $old_charset = mb_regex_encoding();
 778  
 779          if ($charset != $old_charset) {
 780              @mb_regex_encoding($charset);
 781          }
 782          $alpha = !@mb_ereg_match('[^[:alpha:]]', $string);
 783          if ($charset != $old_charset) {
 784              @mb_regex_encoding($old_charset);
 785          }
 786  
 787          return $alpha;
 788      }
 789  
 790      /**
 791       * Returns true if ever character in the parameter is a lowercase letter in
 792       * the current locale.
 793       *
 794       * @param string $string   The string to test.
 795       * @param string $charset  The charset to use when testing the string.
 796       *
 797       * @return boolean  True if the parameter was lowercase.
 798       */
 799      public static function isLower($string, $charset)
 800      {
 801          return ((self::lower($string, true, $charset) === $string) &&
 802                  self::isAlpha($string, $charset));
 803      }
 804  
 805      /**
 806       * Returns true if every character in the parameter is an uppercase letter
 807       * in the current locale.
 808       *
 809       * @param string $string   The string to test.
 810       * @param string $charset  The charset to use when testing the string.
 811       *
 812       * @return boolean  True if the parameter was uppercase.
 813       */
 814      public static function isUpper($string, $charset)
 815      {
 816          return ((self::upper($string, true, $charset) === $string) &&
 817                  self::isAlpha($string, $charset));
 818      }
 819  
 820      /**
 821       * Performs a multibyte safe regex match search on the text provided.
 822       *
 823       * @param string $text     The text to search.
 824       * @param array $regex     The regular expressions to use, without perl
 825       *                         regex delimiters (e.g. '/' or '|').
 826       * @param string $charset  The character set of the text.
 827       *
 828       * @return array  The matches array from the first regex that matches.
 829       */
 830      public static function regexMatch($text, $regex, $charset = null)
 831      {
 832          if (!empty($charset)) {
 833              $regex = self::convertCharset($regex, $charset, 'utf-8');
 834              $text = self::convertCharset($text, $charset, 'utf-8');
 835          }
 836  
 837          $matches = array();
 838          foreach ($regex as $val) {
 839              if (preg_match('/' . $val . '/u', $text, $matches)) {
 840                  break;
 841              }
 842          }
 843  
 844          if (!empty($charset)) {
 845              $matches = self::convertCharset($matches, 'utf-8', $charset);
 846          }
 847  
 848          return $matches;
 849      }
 850  
 851      /**
 852       * Check to see if a string is valid UTF-8.
 853       *
 854       * @param string $text  The text to check.
 855       *
 856       * @return boolean  True if valid UTF-8.
 857       */
 858      public static function validUtf8($text)
 859      {
 860          $text = strval($text);
 861  
 862          // First check for illegal surrogate pair sequences. See RFC 3629.
 863          if (preg_match('/\xE0[\x80-\x9F][\x80-\xBF]|\xED[\xA0-\xBF][\x80-\xBF]/S', $text)) {
 864              return false;
 865          }
 866  
 867          for ($i = 0, $len = strlen($text); $i < $len; ++$i) {
 868              $c = ord($text[$i]);
 869              if ($c > 128) {
 870                  if ($c > 247) {
 871                      // STD 63 (RFC 3629) eliminates 5 & 6-byte characters.
 872                      return false;
 873                  } elseif ($c > 239) {
 874                      $j = 3;
 875                  } elseif ($c > 223) {
 876                      $j = 2;
 877                  } elseif ($c > 191) {
 878                      $j = 1;
 879                  } else {
 880                      return false;
 881                  }
 882  
 883                  if (($i + $j) > $len) {
 884                      return false;
 885                  }
 886  
 887                  do {
 888                      $c = ord($text[++$i]);
 889                      if (($c < 128) || ($c > 191)) {
 890                          return false;
 891                      }
 892                  } while (--$j);
 893              }
 894          }
 895  
 896          return true;
 897      }
 898  
 899      /**
 900       * Workaround charsets that don't work with mbstring functions.
 901       *
 902       * @param string $charset  The original charset.
 903       *
 904       * @return string  The charset to use with mbstring functions.
 905       */
 906      protected static function _mbstringCharset($charset)
 907      {
 908          /* mbstring functions do not handle the 'ks_c_5601-1987' &
 909           * 'ks_c_5601-1989' charsets. However, these charsets are used, for
 910           * example, by various versions of Outlook to send Korean characters.
 911           * Use UHC (CP949) encoding instead. See, e.g.,
 912           * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */
 913          return in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989'))
 914              ? 'UHC'
 915              : $charset;
 916      }
 917  
 918      /**
 919       * Strip UTF-8 byte order mark (BOM) from string data.
 920       *
 921       * @param string $str  Input string (UTF-8).
 922       *
 923       * @return string  Stripped string (UTF-8).
 924       */
 925      public static function trimUtf8Bom($str)
 926      {
 927          return (substr($str, 0, 3) == pack('CCC', 239, 187, 191))
 928              ? substr($str, 3)
 929              : $str;
 930      }
 931  
 932  }