See Release Notes
Long Term Support Release
Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]
1 <?php 2 /** 3 * Provides static methods for charset and locale safe string manipulation. 4 * 5 * Copyright 2003-2017 Horde LLC (http://www.horde.org/) 6 * 7 * See the enclosed file LICENSE for license information (LGPL). If you 8 * did not receive this file, see http://www.horde.org/licenses/lgpl21. 9 * 10 * @todo Split up in Horde_String_Multibyte for multibyte-safe methods and 11 * Horde_String_Locale for locale-safe methods. 12 * 13 * @author Jan Schneider <jan@horde.org> 14 * @category Horde 15 * @license http://www.horde.org/licenses/lgpl21 LGPL 2.1 16 * @package Util 17 */ 18 class Horde_String 19 { 20 /** 21 * lower() cache. 22 * 23 * @var array 24 */ 25 protected static $_lowers = array(); 26 27 /** 28 * upper() cache. 29 * 30 * @var array 31 */ 32 protected static $_uppers = array(); 33 34 /** 35 * Converts a string from one charset to another. 36 * 37 * Uses the iconv or the mbstring extensions. 38 * The original string is returned if conversion failed or none 39 * of the extensions were available. 40 * 41 * @param mixed $input The data to be converted. If $input is an an 42 * array, the array's values get converted 43 * recursively. 44 * @param string $from The string's current charset. 45 * @param string $to The charset to convert the string to. 46 * @param boolean $force Force conversion? 47 * 48 * @return mixed The converted input data. 49 */ 50 public static function convertCharset($input, $from, $to, $force = false) 51 { 52 /* Don't bother converting numbers. */ 53 if (is_numeric($input)) { 54 return $input; 55 } 56 57 /* If the from and to character sets are identical, return now. */ 58 if (!$force && $from == $to) { 59 return $input; 60 } 61 $from = self::lower($from); 62 $to = self::lower($to); 63 if (!$force && $from == $to) { 64 return $input; 65 } 66 67 if (is_array($input)) { 68 $tmp = array(); 69 foreach ($input as $key => $val) { 70 $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force); 71 } 72 return $tmp; 73 } 74 75 if (is_object($input)) { 76 // PEAR_Error/Exception objects are almost guaranteed to contain 77 // recursion, which will cause a segfault in PHP. We should never 78 // reach this line, but add a check. 79 if (($input instanceof Exception) || 80 ($input instanceof PEAR_Error)) { 81 return ''; 82 } 83 84 $input = clone $input; 85 $vars = get_object_vars($input); 86 foreach ($vars as $key => $val) { 87 $input->$key = self::convertCharset($val, $from, $to, $force); 88 } 89 return $input; 90 } 91 92 if (!is_string($input)) { 93 return $input; 94 } 95 96 return self::_convertCharset($input, $from, $to); 97 } 98 99 /** 100 * Internal function used to do charset conversion. 101 * 102 * @param string $input See self::convertCharset(). 103 * @param string $from See self::convertCharset(). 104 * @param string $to See self::convertCharset(). 105 * 106 * @return string The converted string. 107 */ 108 protected static function _convertCharset($input, $from, $to) 109 { 110 /* Use utf8_[en|de]code() if possible and if the string isn't too 111 * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these 112 * functions use more memory. */ 113 if (Horde_Util::extensionExists('xml') && 114 ((strlen($input) < 16777216) || 115 !Horde_Util::extensionExists('iconv') || 116 !Horde_Util::extensionExists('mbstring'))) { 117 if (($to == 'utf-8') && 118 in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) { 119 return utf8_encode($input); 120 } 121 122 if (($from == 'utf-8') && 123 in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) { 124 return utf8_decode($input); 125 } 126 } 127 128 /* Try UTF7-IMAP conversions. */ 129 if (($from == 'utf7-imap') || ($to == 'utf7-imap')) { 130 try { 131 if ($from == 'utf7-imap') { 132 return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to); 133 } else { 134 if ($from == 'utf-8') { 135 $conv = $input; 136 } else { 137 $conv = self::convertCharset($input, $from, 'UTF-8'); 138 } 139 return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv); 140 } 141 } catch (Horde_Imap_Client_Exception $e) { 142 return $input; 143 } 144 } 145 146 /* Try iconv with transliteration. */ 147 if (Horde_Util::extensionExists('iconv')) { 148 unset($php_errormsg); 149 ini_set('track_errors', 1); 150 $out = @iconv($from, $to . '//TRANSLIT', $input); 151 $errmsg = isset($php_errormsg); 152 ini_restore('track_errors'); 153 if (!$errmsg && $out !== false) { 154 return $out; 155 } 156 } 157 158 /* Try mbstring. */ 159 if (Horde_Util::extensionExists('mbstring')) { 160 try { 161 $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from)); 162 if (!empty($out)) { 163 return $out; 164 } 165 } catch (ValueError $e) { 166 // catch error thrown under PHP 8.0, if mbstring does not support the encoding 167 } 168 } 169 170 return $input; 171 } 172 173 /** 174 * Makes a string lowercase. 175 * 176 * @param string $string The string to be converted. 177 * @param boolean $locale If true the string will be converted based on 178 * a given charset, locale independent else. 179 * @param string $charset If $locale is true, the charset to use when 180 * converting. 181 * 182 * @return string The string with lowercase characters. 183 */ 184 public static function lower($string, $locale = false, $charset = null) 185 { 186 if ($locale) { 187 if (Horde_Util::extensionExists('mbstring')) { 188 if (is_null($charset)) { 189 throw new InvalidArgumentException('$charset argument must not be null'); 190 } 191 $ret = @mb_strtolower($string, self::_mbstringCharset($charset)); 192 if (!empty($ret)) { 193 return $ret; 194 } 195 } 196 return strtolower($string); 197 } 198 199 if (!isset(self::$_lowers[$string])) { 200 $language = setlocale(LC_CTYPE, 0); 201 setlocale(LC_CTYPE, 'C'); 202 if ($string === null) { 203 self::$_lowers[$string] = ''; 204 } else { 205 self::$_lowers[$string] = strtolower($string); 206 } 207 setlocale(LC_CTYPE, $language); 208 } 209 210 return self::$_lowers[$string]; 211 } 212 213 /** 214 * Makes a string uppercase. 215 * 216 * @param string $string The string to be converted. 217 * @param boolean $locale If true the string will be converted based on a 218 * given charset, locale independent else. 219 * @param string $charset If $locale is true, the charset to use when 220 * converting. If not provided the current charset. 221 * 222 * @return string The string with uppercase characters. 223 */ 224 public static function upper($string, $locale = false, $charset = null) 225 { 226 if ($locale) { 227 if (Horde_Util::extensionExists('mbstring')) { 228 if (is_null($charset)) { 229 throw new InvalidArgumentException('$charset argument must not be null'); 230 } 231 $ret = @mb_strtoupper($string, self::_mbstringCharset($charset)); 232 if (!empty($ret)) { 233 return $ret; 234 } 235 } 236 return strtoupper($string); 237 } 238 239 if (!isset(self::$_uppers[$string])) { 240 $language = setlocale(LC_CTYPE, 0); 241 setlocale(LC_CTYPE, 'C'); 242 self::$_uppers[$string] = strtoupper($string); 243 setlocale(LC_CTYPE, $language); 244 } 245 246 return self::$_uppers[$string]; 247 } 248 249 /** 250 * Returns a string with the first letter capitalized if it is 251 * alphabetic. 252 * 253 * @param string $string The string to be capitalized. 254 * @param boolean $locale If true the string will be converted based on a 255 * given charset, locale independent else. 256 * @param string $charset The charset to use, defaults to current charset. 257 * 258 * @return string The capitalized string. 259 */ 260 public static function ucfirst($string, $locale = false, $charset = null) 261 { 262 if ($locale) { 263 if (is_null($charset)) { 264 throw new InvalidArgumentException('$charset argument must not be null'); 265 } 266 $first = self::substr($string, 0, 1, $charset); 267 if (self::isAlpha($first, $charset)) { 268 $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset); 269 } 270 } else { 271 $string = self::upper(substr($string, 0, 1), false) . substr($string, 1); 272 } 273 274 return $string; 275 } 276 277 /** 278 * Returns a string with the first letter of each word capitalized if it is 279 * alphabetic. 280 * 281 * Sentences are splitted into words at whitestrings. 282 * 283 * @param string $string The string to be capitalized. 284 * @param boolean $locale If true the string will be converted based on a 285 * given charset, locale independent else. 286 * @param string $charset The charset to use, defaults to current charset. 287 * 288 * @return string The capitalized string. 289 */ 290 public static function ucwords($string, $locale = false, $charset = null) 291 { 292 $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE); 293 for ($i = 0, $c = count($words); $i < $c; $i += 2) { 294 $words[$i] = self::ucfirst($words[$i], $locale, $charset); 295 } 296 return implode('', $words); 297 } 298 299 /** 300 * Returns part of a string. 301 * 302 * @param string $string The string to be converted. 303 * @param integer $start The part's start position, zero based. 304 * @param integer $length The part's length. 305 * @param string $charset The charset to use when calculating the part's 306 * position and length, defaults to current 307 * charset. 308 * 309 * @return string The string's part. 310 */ 311 public static function substr($string, $start, $length = null, 312 $charset = 'UTF-8') 313 { 314 if (is_null($length)) { 315 $length = self::length($string, $charset) - $start; 316 } 317 318 if ($length === 0) { 319 return ''; 320 } 321 322 $error = false; 323 324 /* Try mbstring. */ 325 if (Horde_Util::extensionExists('mbstring')) { 326 $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset)); 327 328 /* mb_substr() returns empty string on failure. */ 329 if (strlen($ret)) { 330 return $ret; 331 } 332 $error = true; 333 } 334 335 /* Try iconv. */ 336 if (Horde_Util::extensionExists('iconv')) { 337 $ret = @iconv_substr($string, $start, $length, $charset); 338 339 /* iconv_substr() returns false on failure. */ 340 if ($ret !== false) { 341 return $ret; 342 } 343 $error = true; 344 } 345 346 /* Try intl. */ 347 if (Horde_Util::extensionExists('intl')) { 348 $ret = self::convertCharset( 349 @grapheme_substr( 350 self::convertCharset($string, $charset, 'UTF-8'), 351 $start, 352 $length 353 ), 354 'UTF-8', 355 $charset 356 ); 357 358 /* grapheme_substr() returns false on failure. */ 359 if ($ret !== false) { 360 return $ret; 361 } 362 $error = true; 363 } 364 365 return $error 366 ? '' 367 : substr($string, $start, $length); 368 } 369 370 /** 371 * Returns the character (not byte) length of a string. 372 * 373 * @param string $string The string to return the length of. 374 * @param string $charset The charset to use when calculating the string's 375 * length. 376 * 377 * @return integer The string's length. 378 */ 379 public static function length($string, $charset = 'UTF-8') 380 { 381 $charset = self::lower($charset); 382 383 if ($charset == 'utf-8' || $charset == 'utf8') { 384 return strlen(utf8_decode($string)); 385 } 386 387 if (Horde_Util::extensionExists('mbstring')) { 388 $ret = @mb_strlen($string, self::_mbstringCharset($charset)); 389 if (!empty($ret)) { 390 return $ret; 391 } 392 } 393 if (Horde_Util::extensionExists('intl')) { 394 return grapheme_strlen( 395 self::convertCharset($string, $charset, 'UTF-8') 396 ); 397 } 398 399 return strlen($string); 400 } 401 402 /** 403 * Returns the numeric position of the first occurrence of $needle 404 * in the $haystack string. 405 * 406 * @param string $haystack The string to search through. 407 * @param string $needle The string to search for. 408 * @param integer $offset Character in $haystack to start searching at. 409 * @param string $charset Charset of $needle. 410 * 411 * @return integer The position of first occurrence. 412 */ 413 public static function pos( 414 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 415 ) 416 { 417 return self::_pos($haystack, $needle, $offset, $charset, 'strpos'); 418 } 419 420 /** 421 * Returns the numeric position of the first case-insensitive occurrence 422 * of $needle in the $haystack string. 423 * 424 * @since 2.5.0 425 * 426 * @param string $haystack The string to search through. 427 * @param string $needle The string to search for. 428 * @param integer $offset Character in $haystack to start searching at. 429 * @param string $charset Charset of $needle. 430 * 431 * @return integer The position of first case-insensitive occurrence. 432 */ 433 public static function ipos( 434 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 435 ) 436 { 437 return self::_pos($haystack, $needle, $offset, $charset, 'stripos'); 438 } 439 440 /** 441 * Returns the numeric position of the last occurrence of $needle 442 * in the $haystack string. 443 * 444 * @param string $haystack The string to search through. 445 * @param string $needle The string to search for. 446 * @param integer $offset Character in $haystack to start searching at. 447 * @param string $charset Charset of $needle. 448 * 449 * @return integer The position of last occurrence. 450 */ 451 public static function rpos( 452 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 453 ) 454 { 455 return self::_pos($haystack, $needle, $offset, $charset, 'strrpos'); 456 } 457 458 /** 459 * Returns the numeric position of the last case-insensitive occurrence of 460 * $needle in the $haystack string. 461 * 462 * @since 2.5.0 463 * 464 * @param string $haystack The string to search through. 465 * @param string $needle The string to search for. 466 * @param integer $offset Character in $haystack to start searching at. 467 * @param string $charset Charset of $needle. 468 * 469 * @return integer The position of last case-insensitive occurrence. 470 */ 471 public static function ripos( 472 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 473 ) 474 { 475 return self::_pos($haystack, $needle, $offset, $charset, 'strripos'); 476 } 477 478 /** 479 * Perform string position searches. 480 * 481 * @param string $haystack The string to search through. 482 * @param string $needle The string to search for. 483 * @param integer $offset Character in $haystack to start searching at. 484 * @param string $charset Charset of $needle. 485 * @param string $func Function to use. 486 * 487 * @return integer The position of occurrence. 488 * 489 */ 490 protected static function _pos( 491 $haystack, $needle, $offset, $charset, $func 492 ) 493 { 494 if (Horde_Util::extensionExists('mbstring')) { 495 unset($php_errormsg); 496 $track_errors = ini_set('track_errors', 1); 497 $ret = @call_user_func('mb_' . $func, $haystack, $needle, $offset, self::_mbstringCharset($charset)); 498 ini_set('track_errors', $track_errors); 499 if (!isset($php_errormsg)) { 500 return $ret; 501 } 502 } 503 504 if (Horde_Util::extensionExists('intl')) { 505 unset($php_errormsg); 506 $track_errors = ini_set('track_errors', 1); 507 $ret = self::convertCharset( 508 @call_user_func( 509 'grapheme_' . $func, 510 self::convertCharset($haystack, $charset, 'UTF-8'), 511 self::convertCharset($needle, $charset, 'UTF-8'), 512 $offset 513 ), 514 'UTF-8', 515 $charset 516 ); 517 ini_set('track_errors', $track_errors); 518 if (!isset($php_errormsg)) { 519 return $ret; 520 } 521 } 522 523 return $func($haystack, $needle, $offset); 524 } 525 526 /** 527 * Returns a string padded to a certain length with another string. 528 * This method behaves exactly like str_pad() but is multibyte safe. 529 * 530 * @param string $input The string to be padded. 531 * @param integer $length The length of the resulting string. 532 * @param string $pad The string to pad the input string with. Must 533 * be in the same charset like the input string. 534 * @param const $type The padding type. One of STR_PAD_LEFT, 535 * STR_PAD_RIGHT, or STR_PAD_BOTH. 536 * @param string $charset The charset of the input and the padding 537 * strings. 538 * 539 * @return string The padded string. 540 */ 541 public static function pad($input, $length, $pad = ' ', 542 $type = STR_PAD_RIGHT, $charset = 'UTF-8') 543 { 544 $mb_length = self::length($input, $charset); 545 $sb_length = strlen($input); 546 $pad_length = self::length($pad, $charset); 547 548 /* Return if we already have the length. */ 549 if ($mb_length >= $length) { 550 return $input; 551 } 552 553 /* Shortcut for single byte strings. */ 554 if ($mb_length == $sb_length && $pad_length == strlen($pad)) { 555 return str_pad($input, $length, $pad, $type); 556 } 557 558 switch ($type) { 559 case STR_PAD_LEFT: 560 $left = $length - $mb_length; 561 $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input; 562 break; 563 564 case STR_PAD_BOTH: 565 $left = floor(($length - $mb_length) / 2); 566 $right = ceil(($length - $mb_length) / 2); 567 $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . 568 $input . 569 self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset); 570 break; 571 572 case STR_PAD_RIGHT: 573 $right = $length - $mb_length; 574 $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset); 575 break; 576 } 577 578 return $output; 579 } 580 581 /** 582 * Wraps the text of a message. 583 * 584 * @param string $string String containing the text to wrap. 585 * @param integer $width Wrap the string at this number of 586 * characters. 587 * @param string $break Character(s) to use when breaking lines. 588 * @param boolean $cut Whether to cut inside words if a line 589 * can't be wrapped. 590 * @param boolean $line_folding Whether to apply line folding rules per 591 * RFC 822 or similar. The correct break 592 * characters including leading whitespace 593 * have to be specified too. 594 * 595 * @return string String containing the wrapped text. 596 */ 597 public static function wordwrap($string, $width = 75, $break = "\n", 598 $cut = false, $line_folding = false) 599 { 600 $breakRegex = '(?:' . preg_quote($break) . ')'; 601 $rpos = self::rpos($break, "\n"); 602 if ($rpos === false) { 603 $rpos = 0; 604 } else { 605 $rpos++; 606 } 607 $wrapped = ''; 608 $hasWrapped = false; 609 610 while (self::length($string, 'UTF-8') > $width) { 611 $line = self::substr($string, 0, $width + ($hasWrapped ? $rpos : 0), 'UTF-8'); 612 $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8'); 613 614 // Make sure we didn't cut a word, unless we want hard breaks 615 // anyway. 616 if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) { 617 $line .= $match[1]; 618 $string = $match[2]; 619 } 620 621 // Wrap at existing line breaks. 622 $regex = '/^(' . ($hasWrapped ? $breakRegex : '') . '.*?)(\r?\n)(.*)$/us'; 623 if (preg_match($regex, $line, $match)) { 624 $wrapped .= $match[1] . $match[2]; 625 $string = $match[3] . $string; 626 $hasWrapped = false; 627 continue; 628 } 629 630 // Wrap at the last colon or semicolon followed by a whitespace if 631 // doing line folding. 632 if ($line_folding && 633 preg_match('/^(.*?)(;|:)(\s+.*)$/us', $line, $match)) { 634 $wrapped .= $match[1] . $match[2]; 635 $string = $break . $match[3] . $string; 636 $hasWrapped = true; 637 continue; 638 } 639 640 // Wrap at the last whitespace of $line. 641 $sub = $line_folding 642 ? '(' . ($hasWrapped ? $breakRegex : '') . '.+[^\s])' 643 : '(' . ($hasWrapped ? $breakRegex : '') . '.*)'; 644 645 if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) { 646 $wrapped .= $match[1]; 647 $string = $break . ($line_folding ? $match[2] : '') 648 . $match[3] . $string; 649 $hasWrapped = true; 650 continue; 651 } 652 653 // Hard wrap if necessary. 654 if ($cut) { 655 $wrapped .= $line; 656 $string = $break . $string; 657 $hasWrapped = true; 658 continue; 659 } 660 661 $wrapped .= $line; 662 $hasWrapped = false; 663 } 664 665 return $wrapped . $string; 666 } 667 668 /** 669 * Wraps the text of a message. 670 * 671 * @param string $text String containing the text to wrap. 672 * @param integer $length Wrap $text at this number of characters. 673 * @param string $break_char Character(s) to use when breaking lines. 674 * @param boolean $quote Ignore lines that are wrapped with the '>' 675 * character (RFC 2646)? If true, we don't 676 * remove any padding whitespace at the end of 677 * the string. 678 * 679 * @return string String containing the wrapped text. 680 */ 681 public static function wrap($text, $length = 80, $break_char = "\n", 682 $quote = false) 683 { 684 $paragraphs = array(); 685 686 foreach (preg_split('/\r?\n/', $text) as $input) { 687 if ($quote && (strpos($input, '>') === 0)) { 688 $line = $input; 689 } else { 690 /* We need to handle the Usenet-style signature line 691 * separately; since the space after the two dashes is 692 * REQUIRED, we don't want to trim the line. */ 693 if ($input != '-- ') { 694 $input = rtrim($input); 695 } 696 $line = self::wordwrap($input, $length, $break_char); 697 } 698 699 $paragraphs[] = $line; 700 } 701 702 return implode($break_char, $paragraphs); 703 } 704 705 /** 706 * Return a truncated string, suitable for notifications. 707 * 708 * @param string $text The original string. 709 * @param integer $length The maximum length. 710 * 711 * @return string The truncated string, if longer than $length. 712 */ 713 public static function truncate($text, $length = 100) 714 { 715 return (self::length($text) > $length) 716 ? rtrim(self::substr($text, 0, $length - 3)) . '...' 717 : $text; 718 } 719 720 /** 721 * Return an abbreviated string, with characters in the middle of the 722 * excessively long string replaced by '...'. 723 * 724 * @param string $text The original string. 725 * @param integer $length The length at which to abbreviate. 726 * 727 * @return string The abbreviated string, if longer than $length. 728 */ 729 public static function abbreviate($text, $length = 20) 730 { 731 return (self::length($text) > $length) 732 ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1)) 733 : $text; 734 } 735 736 /** 737 * Returns the common leading part of two strings. 738 * 739 * @param string $str1 A string. 740 * @param string $str2 Another string. 741 * 742 * @return string The start of $str1 and $str2 that is identical in both. 743 */ 744 public static function common($str1, $str2) 745 { 746 for ($result = '', $i = 0; 747 isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i]; 748 $i++) { 749 $result .= $str1[$i]; 750 } 751 return $result; 752 } 753 754 /** 755 * Returns true if the every character in the parameter is an alphabetic 756 * character. 757 * 758 * @param string $string The string to test. 759 * @param string $charset The charset to use when testing the string. 760 * 761 * @return boolean True if the parameter was alphabetic only. 762 */ 763 public static function isAlpha($string, $charset) 764 { 765 if (!Horde_Util::extensionExists('mbstring')) { 766 return ctype_alpha($string); 767 } 768 769 $charset = self::_mbstringCharset($charset); 770 $old_charset = mb_regex_encoding(); 771 772 if ($charset != $old_charset) { 773 @mb_regex_encoding($charset); 774 } 775 $alpha = !@mb_ereg_match('[^[:alpha:]]', $string); 776 if ($charset != $old_charset) { 777 @mb_regex_encoding($old_charset); 778 } 779 780 return $alpha; 781 } 782 783 /** 784 * Returns true if ever character in the parameter is a lowercase letter in 785 * the current locale. 786 * 787 * @param string $string The string to test. 788 * @param string $charset The charset to use when testing the string. 789 * 790 * @return boolean True if the parameter was lowercase. 791 */ 792 public static function isLower($string, $charset) 793 { 794 return ((self::lower($string, true, $charset) === $string) && 795 self::isAlpha($string, $charset)); 796 } 797 798 /** 799 * Returns true if every character in the parameter is an uppercase letter 800 * in the current locale. 801 * 802 * @param string $string The string to test. 803 * @param string $charset The charset to use when testing the string. 804 * 805 * @return boolean True if the parameter was uppercase. 806 */ 807 public static function isUpper($string, $charset) 808 { 809 return ((self::upper($string, true, $charset) === $string) && 810 self::isAlpha($string, $charset)); 811 } 812 813 /** 814 * Performs a multibyte safe regex match search on the text provided. 815 * 816 * @param string $text The text to search. 817 * @param array $regex The regular expressions to use, without perl 818 * regex delimiters (e.g. '/' or '|'). 819 * @param string $charset The character set of the text. 820 * 821 * @return array The matches array from the first regex that matches. 822 */ 823 public static function regexMatch($text, $regex, $charset = null) 824 { 825 if (!empty($charset)) { 826 $regex = self::convertCharset($regex, $charset, 'utf-8'); 827 $text = self::convertCharset($text, $charset, 'utf-8'); 828 } 829 830 $matches = array(); 831 foreach ($regex as $val) { 832 if (preg_match('/' . $val . '/u', $text, $matches)) { 833 break; 834 } 835 } 836 837 if (!empty($charset)) { 838 $matches = self::convertCharset($matches, 'utf-8', $charset); 839 } 840 841 return $matches; 842 } 843 844 /** 845 * Check to see if a string is valid UTF-8. 846 * 847 * @param string $text The text to check. 848 * 849 * @return boolean True if valid UTF-8. 850 */ 851 public static function validUtf8($text) 852 { 853 $text = strval($text); 854 855 // First check for illegal surrogate pair sequences. See RFC 3629. 856 if (preg_match('/\xE0[\x80-\x9F][\x80-\xBF]|\xED[\xA0-\xBF][\x80-\xBF]/S', $text)) { 857 return false; 858 } 859 860 for ($i = 0, $len = strlen($text); $i < $len; ++$i) { 861 $c = ord($text[$i]); 862 if ($c > 128) { 863 if ($c > 247) { 864 // STD 63 (RFC 3629) eliminates 5 & 6-byte characters. 865 return false; 866 } elseif ($c > 239) { 867 $j = 3; 868 } elseif ($c > 223) { 869 $j = 2; 870 } elseif ($c > 191) { 871 $j = 1; 872 } else { 873 return false; 874 } 875 876 if (($i + $j) > $len) { 877 return false; 878 } 879 880 do { 881 $c = ord($text[++$i]); 882 if (($c < 128) || ($c > 191)) { 883 return false; 884 } 885 } while (--$j); 886 } 887 } 888 889 return true; 890 } 891 892 /** 893 * Workaround charsets that don't work with mbstring functions. 894 * 895 * @param string $charset The original charset. 896 * 897 * @return string The charset to use with mbstring functions. 898 */ 899 protected static function _mbstringCharset($charset) 900 { 901 /* mbstring functions do not handle the 'ks_c_5601-1987' & 902 * 'ks_c_5601-1989' charsets. However, these charsets are used, for 903 * example, by various versions of Outlook to send Korean characters. 904 * Use UHC (CP949) encoding instead. See, e.g., 905 * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */ 906 return in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989')) 907 ? 'UHC' 908 : $charset; 909 } 910 911 /** 912 * Strip UTF-8 byte order mark (BOM) from string data. 913 * 914 * @param string $str Input string (UTF-8). 915 * 916 * @return string Stripped string (UTF-8). 917 */ 918 public static function trimUtf8Bom($str) 919 { 920 return (substr($str, 0, 3) == pack('CCC', 239, 187, 191)) 921 ? substr($str, 3) 922 : $str; 923 } 924 925 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body