See Release Notes
Long Term Support Release
Differences Between: [Versions 39 and 401] [Versions 39 and 402] [Versions 39 and 403]
1 <?php 2 /** 3 * Provides static methods for charset and locale safe string manipulation. 4 * 5 * Copyright 2003-2017 Horde LLC (http://www.horde.org/) 6 * 7 * See the enclosed file LICENSE for license information (LGPL). If you 8 * did not receive this file, see http://www.horde.org/licenses/lgpl21. 9 * 10 * @todo Split up in Horde_String_Multibyte for multibyte-safe methods and 11 * Horde_String_Locale for locale-safe methods. 12 * 13 * @author Jan Schneider <jan@horde.org> 14 * @category Horde 15 * @license http://www.horde.org/licenses/lgpl21 LGPL 2.1 16 * @package Util 17 */ 18 class Horde_String 19 { 20 /** 21 * lower() cache. 22 * 23 * @var array 24 */ 25 protected static $_lowers = array(); 26 27 /** 28 * upper() cache. 29 * 30 * @var array 31 */ 32 protected static $_uppers = array(); 33 34 /** 35 * Converts a string from one charset to another. 36 * 37 * Uses the iconv or the mbstring extensions. 38 * The original string is returned if conversion failed or none 39 * of the extensions were available. 40 * 41 * @param mixed $input The data to be converted. If $input is an an 42 * array, the array's values get converted 43 * recursively. 44 * @param string $from The string's current charset. 45 * @param string $to The charset to convert the string to. 46 * @param boolean $force Force conversion? 47 * 48 * @return mixed The converted input data. 49 */ 50 public static function convertCharset($input, $from, $to, $force = false) 51 { 52 /* Don't bother converting numbers. */ 53 if (is_numeric($input)) { 54 return $input; 55 } 56 57 /* If the from and to character sets are identical, return now. */ 58 if (!$force && $from == $to) { 59 return $input; 60 } 61 $from = self::lower($from); 62 $to = self::lower($to); 63 if (!$force && $from == $to) { 64 return $input; 65 } 66 67 if (is_array($input)) { 68 $tmp = array(); 69 foreach ($input as $key => $val) { 70 $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force); 71 } 72 return $tmp; 73 } 74 75 if (is_object($input)) { 76 // PEAR_Error/Exception objects are almost guaranteed to contain 77 // recursion, which will cause a segfault in PHP. We should never 78 // reach this line, but add a check. 79 if (($input instanceof Exception) || 80 ($input instanceof PEAR_Error)) { 81 return ''; 82 } 83 84 $input = clone $input; 85 $vars = get_object_vars($input); 86 foreach ($vars as $key => $val) { 87 $input->$key = self::convertCharset($val, $from, $to, $force); 88 } 89 return $input; 90 } 91 92 if (!is_string($input)) { 93 return $input; 94 } 95 96 return self::_convertCharset($input, $from, $to); 97 } 98 99 /** 100 * Internal function used to do charset conversion. 101 * 102 * @param string $input See self::convertCharset(). 103 * @param string $from See self::convertCharset(). 104 * @param string $to See self::convertCharset(). 105 * 106 * @return string The converted string. 107 */ 108 protected static function _convertCharset($input, $from, $to) 109 { 110 /* Use utf8_[en|de]code() if possible and if the string isn't too 111 * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these 112 * functions use more memory. */ 113 if (Horde_Util::extensionExists('xml') && 114 ((strlen($input) < 16777216) || 115 !Horde_Util::extensionExists('iconv') || 116 !Horde_Util::extensionExists('mbstring'))) { 117 if (($to == 'utf-8') && 118 in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) { 119 return utf8_encode($input); 120 } 121 122 if (($from == 'utf-8') && 123 in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) { 124 return utf8_decode($input); 125 } 126 } 127 128 /* Try UTF7-IMAP conversions. */ 129 if (($from == 'utf7-imap') || ($to == 'utf7-imap')) { 130 try { 131 if ($from == 'utf7-imap') { 132 return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to); 133 } else { 134 if ($from == 'utf-8') { 135 $conv = $input; 136 } else { 137 $conv = self::convertCharset($input, $from, 'UTF-8'); 138 } 139 return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv); 140 } 141 } catch (Horde_Imap_Client_Exception $e) { 142 return $input; 143 } 144 } 145 146 /* Try iconv with transliteration. */ 147 if (Horde_Util::extensionExists('iconv')) { 148 unset($php_errormsg); 149 ini_set('track_errors', 1); 150 $out = @iconv($from, $to . '//TRANSLIT', $input); 151 $errmsg = isset($php_errormsg); 152 ini_restore('track_errors'); 153 if (!$errmsg && $out !== false) { 154 return $out; 155 } 156 } 157 158 /* Try mbstring. */ 159 if (Horde_Util::extensionExists('mbstring')) { 160 $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from)); 161 if (!empty($out)) { 162 return $out; 163 } 164 } 165 166 return $input; 167 } 168 169 /** 170 * Makes a string lowercase. 171 * 172 * @param string $string The string to be converted. 173 * @param boolean $locale If true the string will be converted based on 174 * a given charset, locale independent else. 175 * @param string $charset If $locale is true, the charset to use when 176 * converting. 177 * 178 * @return string The string with lowercase characters. 179 */ 180 public static function lower($string, $locale = false, $charset = null) 181 { 182 if ($locale) { 183 if (Horde_Util::extensionExists('mbstring')) { 184 if (is_null($charset)) { 185 throw new InvalidArgumentException('$charset argument must not be null'); 186 } 187 $ret = @mb_strtolower($string, self::_mbstringCharset($charset)); 188 if (!empty($ret)) { 189 return $ret; 190 } 191 } 192 return strtolower($string); 193 } 194 195 if (!isset(self::$_lowers[$string])) { 196 $language = setlocale(LC_CTYPE, 0); 197 setlocale(LC_CTYPE, 'C'); 198 self::$_lowers[$string] = strtolower($string); 199 setlocale(LC_CTYPE, $language); 200 } 201 202 return self::$_lowers[$string]; 203 } 204 205 /** 206 * Makes a string uppercase. 207 * 208 * @param string $string The string to be converted. 209 * @param boolean $locale If true the string will be converted based on a 210 * given charset, locale independent else. 211 * @param string $charset If $locale is true, the charset to use when 212 * converting. If not provided the current charset. 213 * 214 * @return string The string with uppercase characters. 215 */ 216 public static function upper($string, $locale = false, $charset = null) 217 { 218 if ($locale) { 219 if (Horde_Util::extensionExists('mbstring')) { 220 if (is_null($charset)) { 221 throw new InvalidArgumentException('$charset argument must not be null'); 222 } 223 $ret = @mb_strtoupper($string, self::_mbstringCharset($charset)); 224 if (!empty($ret)) { 225 return $ret; 226 } 227 } 228 return strtoupper($string); 229 } 230 231 if (!isset(self::$_uppers[$string])) { 232 $language = setlocale(LC_CTYPE, 0); 233 setlocale(LC_CTYPE, 'C'); 234 self::$_uppers[$string] = strtoupper($string); 235 setlocale(LC_CTYPE, $language); 236 } 237 238 return self::$_uppers[$string]; 239 } 240 241 /** 242 * Returns a string with the first letter capitalized if it is 243 * alphabetic. 244 * 245 * @param string $string The string to be capitalized. 246 * @param boolean $locale If true the string will be converted based on a 247 * given charset, locale independent else. 248 * @param string $charset The charset to use, defaults to current charset. 249 * 250 * @return string The capitalized string. 251 */ 252 public static function ucfirst($string, $locale = false, $charset = null) 253 { 254 if ($locale) { 255 if (is_null($charset)) { 256 throw new InvalidArgumentException('$charset argument must not be null'); 257 } 258 $first = self::substr($string, 0, 1, $charset); 259 if (self::isAlpha($first, $charset)) { 260 $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset); 261 } 262 } else { 263 $string = self::upper(substr($string, 0, 1), false) . substr($string, 1); 264 } 265 266 return $string; 267 } 268 269 /** 270 * Returns a string with the first letter of each word capitalized if it is 271 * alphabetic. 272 * 273 * Sentences are splitted into words at whitestrings. 274 * 275 * @param string $string The string to be capitalized. 276 * @param boolean $locale If true the string will be converted based on a 277 * given charset, locale independent else. 278 * @param string $charset The charset to use, defaults to current charset. 279 * 280 * @return string The capitalized string. 281 */ 282 public static function ucwords($string, $locale = false, $charset = null) 283 { 284 $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE); 285 for ($i = 0, $c = count($words); $i < $c; $i += 2) { 286 $words[$i] = self::ucfirst($words[$i], $locale, $charset); 287 } 288 return implode('', $words); 289 } 290 291 /** 292 * Returns part of a string. 293 * 294 * @param string $string The string to be converted. 295 * @param integer $start The part's start position, zero based. 296 * @param integer $length The part's length. 297 * @param string $charset The charset to use when calculating the part's 298 * position and length, defaults to current 299 * charset. 300 * 301 * @return string The string's part. 302 */ 303 public static function substr($string, $start, $length = null, 304 $charset = 'UTF-8') 305 { 306 if (is_null($length)) { 307 $length = self::length($string, $charset) - $start; 308 } 309 310 if ($length === 0) { 311 return ''; 312 } 313 314 $error = false; 315 316 /* Try mbstring. */ 317 if (Horde_Util::extensionExists('mbstring')) { 318 $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset)); 319 320 /* mb_substr() returns empty string on failure. */ 321 if (strlen($ret)) { 322 return $ret; 323 } 324 $error = true; 325 } 326 327 /* Try iconv. */ 328 if (Horde_Util::extensionExists('iconv')) { 329 $ret = @iconv_substr($string, $start, $length, $charset); 330 331 /* iconv_substr() returns false on failure. */ 332 if ($ret !== false) { 333 return $ret; 334 } 335 $error = true; 336 } 337 338 /* Try intl. */ 339 if (Horde_Util::extensionExists('intl')) { 340 $ret = self::convertCharset( 341 @grapheme_substr( 342 self::convertCharset($string, $charset, 'UTF-8'), 343 $start, 344 $length 345 ), 346 'UTF-8', 347 $charset 348 ); 349 350 /* grapheme_substr() returns false on failure. */ 351 if ($ret !== false) { 352 return $ret; 353 } 354 $error = true; 355 } 356 357 return $error 358 ? '' 359 : substr($string, $start, $length); 360 } 361 362 /** 363 * Returns the character (not byte) length of a string. 364 * 365 * @param string $string The string to return the length of. 366 * @param string $charset The charset to use when calculating the string's 367 * length. 368 * 369 * @return integer The string's length. 370 */ 371 public static function length($string, $charset = 'UTF-8') 372 { 373 $charset = self::lower($charset); 374 375 if ($charset == 'utf-8' || $charset == 'utf8') { 376 return strlen(utf8_decode($string)); 377 } 378 379 if (Horde_Util::extensionExists('mbstring')) { 380 $ret = @mb_strlen($string, self::_mbstringCharset($charset)); 381 if (!empty($ret)) { 382 return $ret; 383 } 384 } 385 if (Horde_Util::extensionExists('intl')) { 386 return grapheme_strlen( 387 self::convertCharset($string, $charset, 'UTF-8') 388 ); 389 } 390 391 return strlen($string); 392 } 393 394 /** 395 * Returns the numeric position of the first occurrence of $needle 396 * in the $haystack string. 397 * 398 * @param string $haystack The string to search through. 399 * @param string $needle The string to search for. 400 * @param integer $offset Character in $haystack to start searching at. 401 * @param string $charset Charset of $needle. 402 * 403 * @return integer The position of first occurrence. 404 */ 405 public static function pos( 406 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 407 ) 408 { 409 return self::_pos($haystack, $needle, $offset, $charset, 'strpos'); 410 } 411 412 /** 413 * Returns the numeric position of the first case-insensitive occurrence 414 * of $needle in the $haystack string. 415 * 416 * @since 2.5.0 417 * 418 * @param string $haystack The string to search through. 419 * @param string $needle The string to search for. 420 * @param integer $offset Character in $haystack to start searching at. 421 * @param string $charset Charset of $needle. 422 * 423 * @return integer The position of first case-insensitive occurrence. 424 */ 425 public static function ipos( 426 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 427 ) 428 { 429 return self::_pos($haystack, $needle, $offset, $charset, 'stripos'); 430 } 431 432 /** 433 * Returns the numeric position of the last occurrence of $needle 434 * in the $haystack string. 435 * 436 * @param string $haystack The string to search through. 437 * @param string $needle The string to search for. 438 * @param integer $offset Character in $haystack to start searching at. 439 * @param string $charset Charset of $needle. 440 * 441 * @return integer The position of last occurrence. 442 */ 443 public static function rpos( 444 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 445 ) 446 { 447 return self::_pos($haystack, $needle, $offset, $charset, 'strrpos'); 448 } 449 450 /** 451 * Returns the numeric position of the last case-insensitive occurrence of 452 * $needle in the $haystack string. 453 * 454 * @since 2.5.0 455 * 456 * @param string $haystack The string to search through. 457 * @param string $needle The string to search for. 458 * @param integer $offset Character in $haystack to start searching at. 459 * @param string $charset Charset of $needle. 460 * 461 * @return integer The position of last case-insensitive occurrence. 462 */ 463 public static function ripos( 464 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 465 ) 466 { 467 return self::_pos($haystack, $needle, $offset, $charset, 'strripos'); 468 } 469 470 /** 471 * Perform string position searches. 472 * 473 * @param string $haystack The string to search through. 474 * @param string $needle The string to search for. 475 * @param integer $offset Character in $haystack to start searching at. 476 * @param string $charset Charset of $needle. 477 * @param string $func Function to use. 478 * 479 * @return integer The position of occurrence. 480 * 481 */ 482 protected static function _pos( 483 $haystack, $needle, $offset, $charset, $func 484 ) 485 { 486 if (Horde_Util::extensionExists('mbstring')) { 487 unset($php_errormsg); 488 $track_errors = ini_set('track_errors', 1); 489 $ret = @call_user_func('mb_' . $func, $haystack, $needle, $offset, self::_mbstringCharset($charset)); 490 ini_set('track_errors', $track_errors); 491 if (!isset($php_errormsg)) { 492 return $ret; 493 } 494 } 495 496 if (Horde_Util::extensionExists('intl')) { 497 unset($php_errormsg); 498 $track_errors = ini_set('track_errors', 1); 499 $ret = self::convertCharset( 500 @call_user_func( 501 'grapheme_' . $func, 502 self::convertCharset($haystack, $charset, 'UTF-8'), 503 self::convertCharset($needle, $charset, 'UTF-8'), 504 $offset 505 ), 506 'UTF-8', 507 $charset 508 ); 509 ini_set('track_errors', $track_errors); 510 if (!isset($php_errormsg)) { 511 return $ret; 512 } 513 } 514 515 return $func($haystack, $needle, $offset); 516 } 517 518 /** 519 * Returns a string padded to a certain length with another string. 520 * This method behaves exactly like str_pad() but is multibyte safe. 521 * 522 * @param string $input The string to be padded. 523 * @param integer $length The length of the resulting string. 524 * @param string $pad The string to pad the input string with. Must 525 * be in the same charset like the input string. 526 * @param const $type The padding type. One of STR_PAD_LEFT, 527 * STR_PAD_RIGHT, or STR_PAD_BOTH. 528 * @param string $charset The charset of the input and the padding 529 * strings. 530 * 531 * @return string The padded string. 532 */ 533 public static function pad($input, $length, $pad = ' ', 534 $type = STR_PAD_RIGHT, $charset = 'UTF-8') 535 { 536 $mb_length = self::length($input, $charset); 537 $sb_length = strlen($input); 538 $pad_length = self::length($pad, $charset); 539 540 /* Return if we already have the length. */ 541 if ($mb_length >= $length) { 542 return $input; 543 } 544 545 /* Shortcut for single byte strings. */ 546 if ($mb_length == $sb_length && $pad_length == strlen($pad)) { 547 return str_pad($input, $length, $pad, $type); 548 } 549 550 switch ($type) { 551 case STR_PAD_LEFT: 552 $left = $length - $mb_length; 553 $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input; 554 break; 555 556 case STR_PAD_BOTH: 557 $left = floor(($length - $mb_length) / 2); 558 $right = ceil(($length - $mb_length) / 2); 559 $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . 560 $input . 561 self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset); 562 break; 563 564 case STR_PAD_RIGHT: 565 $right = $length - $mb_length; 566 $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset); 567 break; 568 } 569 570 return $output; 571 } 572 573 /** 574 * Wraps the text of a message. 575 * 576 * @param string $string String containing the text to wrap. 577 * @param integer $width Wrap the string at this number of 578 * characters. 579 * @param string $break Character(s) to use when breaking lines. 580 * @param boolean $cut Whether to cut inside words if a line 581 * can't be wrapped. 582 * @param boolean $line_folding Whether to apply line folding rules per 583 * RFC 822 or similar. The correct break 584 * characters including leading whitespace 585 * have to be specified too. 586 * 587 * @return string String containing the wrapped text. 588 */ 589 public static function wordwrap($string, $width = 75, $break = "\n", 590 $cut = false, $line_folding = false) 591 { 592 $breakRegex = '(?:' . preg_quote($break) . ')'; 593 $rpos = self::rpos($break, "\n"); 594 if ($rpos === false) { 595 $rpos = 0; 596 } else { 597 $rpos++; 598 } 599 $wrapped = ''; 600 $hasWrapped = false; 601 602 while (self::length($string, 'UTF-8') > $width) { 603 $line = self::substr($string, 0, $width + ($hasWrapped ? $rpos : 0), 'UTF-8'); 604 $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8'); 605 606 // Make sure we didn't cut a word, unless we want hard breaks 607 // anyway. 608 if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) { 609 $line .= $match[1]; 610 $string = $match[2]; 611 } 612 613 // Wrap at existing line breaks. 614 $regex = '/^(' . ($hasWrapped ? $breakRegex : '') . '.*?)(\r?\n)(.*)$/us'; 615 if (preg_match($regex, $line, $match)) { 616 $wrapped .= $match[1] . $match[2]; 617 $string = $match[3] . $string; 618 $hasWrapped = false; 619 continue; 620 } 621 622 // Wrap at the last colon or semicolon followed by a whitespace if 623 // doing line folding. 624 if ($line_folding && 625 preg_match('/^(.*?)(;|:)(\s+.*)$/us', $line, $match)) { 626 $wrapped .= $match[1] . $match[2]; 627 $string = $break . $match[3] . $string; 628 $hasWrapped = true; 629 continue; 630 } 631 632 // Wrap at the last whitespace of $line. 633 $sub = $line_folding 634 ? '(' . ($hasWrapped ? $breakRegex : '') . '.+[^\s])' 635 : '(' . ($hasWrapped ? $breakRegex : '') . '.*)'; 636 637 if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) { 638 $wrapped .= $match[1]; 639 $string = $break . ($line_folding ? $match[2] : '') 640 . $match[3] . $string; 641 $hasWrapped = true; 642 continue; 643 } 644 645 // Hard wrap if necessary. 646 if ($cut) { 647 $wrapped .= $line; 648 $string = $break . $string; 649 $hasWrapped = true; 650 continue; 651 } 652 653 $wrapped .= $line; 654 $hasWrapped = false; 655 } 656 657 return $wrapped . $string; 658 } 659 660 /** 661 * Wraps the text of a message. 662 * 663 * @param string $text String containing the text to wrap. 664 * @param integer $length Wrap $text at this number of characters. 665 * @param string $break_char Character(s) to use when breaking lines. 666 * @param boolean $quote Ignore lines that are wrapped with the '>' 667 * character (RFC 2646)? If true, we don't 668 * remove any padding whitespace at the end of 669 * the string. 670 * 671 * @return string String containing the wrapped text. 672 */ 673 public static function wrap($text, $length = 80, $break_char = "\n", 674 $quote = false) 675 { 676 $paragraphs = array(); 677 678 foreach (preg_split('/\r?\n/', $text) as $input) { 679 if ($quote && (strpos($input, '>') === 0)) { 680 $line = $input; 681 } else { 682 /* We need to handle the Usenet-style signature line 683 * separately; since the space after the two dashes is 684 * REQUIRED, we don't want to trim the line. */ 685 if ($input != '-- ') { 686 $input = rtrim($input); 687 } 688 $line = self::wordwrap($input, $length, $break_char); 689 } 690 691 $paragraphs[] = $line; 692 } 693 694 return implode($break_char, $paragraphs); 695 } 696 697 /** 698 * Return a truncated string, suitable for notifications. 699 * 700 * @param string $text The original string. 701 * @param integer $length The maximum length. 702 * 703 * @return string The truncated string, if longer than $length. 704 */ 705 public static function truncate($text, $length = 100) 706 { 707 return (self::length($text) > $length) 708 ? rtrim(self::substr($text, 0, $length - 3)) . '...' 709 : $text; 710 } 711 712 /** 713 * Return an abbreviated string, with characters in the middle of the 714 * excessively long string replaced by '...'. 715 * 716 * @param string $text The original string. 717 * @param integer $length The length at which to abbreviate. 718 * 719 * @return string The abbreviated string, if longer than $length. 720 */ 721 public static function abbreviate($text, $length = 20) 722 { 723 return (self::length($text) > $length) 724 ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1)) 725 : $text; 726 } 727 728 /** 729 * Returns the common leading part of two strings. 730 * 731 * @param string $str1 A string. 732 * @param string $str2 Another string. 733 * 734 * @return string The start of $str1 and $str2 that is identical in both. 735 */ 736 public static function common($str1, $str2) 737 { 738 for ($result = '', $i = 0; 739 isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i]; 740 $i++) { 741 $result .= $str1[$i]; 742 } 743 return $result; 744 } 745 746 /** 747 * Returns true if the every character in the parameter is an alphabetic 748 * character. 749 * 750 * @param string $string The string to test. 751 * @param string $charset The charset to use when testing the string. 752 * 753 * @return boolean True if the parameter was alphabetic only. 754 */ 755 public static function isAlpha($string, $charset) 756 { 757 if (!Horde_Util::extensionExists('mbstring')) { 758 return ctype_alpha($string); 759 } 760 761 $charset = self::_mbstringCharset($charset); 762 $old_charset = mb_regex_encoding(); 763 764 if ($charset != $old_charset) { 765 @mb_regex_encoding($charset); 766 } 767 $alpha = !@mb_ereg_match('[^[:alpha:]]', $string); 768 if ($charset != $old_charset) { 769 @mb_regex_encoding($old_charset); 770 } 771 772 return $alpha; 773 } 774 775 /** 776 * Returns true if ever character in the parameter is a lowercase letter in 777 * the current locale. 778 * 779 * @param string $string The string to test. 780 * @param string $charset The charset to use when testing the string. 781 * 782 * @return boolean True if the parameter was lowercase. 783 */ 784 public static function isLower($string, $charset) 785 { 786 return ((self::lower($string, true, $charset) === $string) && 787 self::isAlpha($string, $charset)); 788 } 789 790 /** 791 * Returns true if every character in the parameter is an uppercase letter 792 * in the current locale. 793 * 794 * @param string $string The string to test. 795 * @param string $charset The charset to use when testing the string. 796 * 797 * @return boolean True if the parameter was uppercase. 798 */ 799 public static function isUpper($string, $charset) 800 { 801 return ((self::upper($string, true, $charset) === $string) && 802 self::isAlpha($string, $charset)); 803 } 804 805 /** 806 * Performs a multibyte safe regex match search on the text provided. 807 * 808 * @param string $text The text to search. 809 * @param array $regex The regular expressions to use, without perl 810 * regex delimiters (e.g. '/' or '|'). 811 * @param string $charset The character set of the text. 812 * 813 * @return array The matches array from the first regex that matches. 814 */ 815 public static function regexMatch($text, $regex, $charset = null) 816 { 817 if (!empty($charset)) { 818 $regex = self::convertCharset($regex, $charset, 'utf-8'); 819 $text = self::convertCharset($text, $charset, 'utf-8'); 820 } 821 822 $matches = array(); 823 foreach ($regex as $val) { 824 if (preg_match('/' . $val . '/u', $text, $matches)) { 825 break; 826 } 827 } 828 829 if (!empty($charset)) { 830 $matches = self::convertCharset($matches, 'utf-8', $charset); 831 } 832 833 return $matches; 834 } 835 836 /** 837 * Check to see if a string is valid UTF-8. 838 * 839 * @param string $text The text to check. 840 * 841 * @return boolean True if valid UTF-8. 842 */ 843 public static function validUtf8($text) 844 { 845 $text = strval($text); 846 847 // First check for illegal surrogate pair sequences. See RFC 3629. 848 if (preg_match('/\xE0[\x80-\x9F][\x80-\xBF]|\xED[\xA0-\xBF][\x80-\xBF]/S', $text)) { 849 return false; 850 } 851 852 for ($i = 0, $len = strlen($text); $i < $len; ++$i) { 853 $c = ord($text[$i]); 854 if ($c > 128) { 855 if ($c > 247) { 856 // STD 63 (RFC 3629) eliminates 5 & 6-byte characters. 857 return false; 858 } elseif ($c > 239) { 859 $j = 3; 860 } elseif ($c > 223) { 861 $j = 2; 862 } elseif ($c > 191) { 863 $j = 1; 864 } else { 865 return false; 866 } 867 868 if (($i + $j) > $len) { 869 return false; 870 } 871 872 do { 873 $c = ord($text[++$i]); 874 if (($c < 128) || ($c > 191)) { 875 return false; 876 } 877 } while (--$j); 878 } 879 } 880 881 return true; 882 } 883 884 /** 885 * Workaround charsets that don't work with mbstring functions. 886 * 887 * @param string $charset The original charset. 888 * 889 * @return string The charset to use with mbstring functions. 890 */ 891 protected static function _mbstringCharset($charset) 892 { 893 /* mbstring functions do not handle the 'ks_c_5601-1987' & 894 * 'ks_c_5601-1989' charsets. However, these charsets are used, for 895 * example, by various versions of Outlook to send Korean characters. 896 * Use UHC (CP949) encoding instead. See, e.g., 897 * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */ 898 return in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989')) 899 ? 'UHC' 900 : $charset; 901 } 902 903 /** 904 * Strip UTF-8 byte order mark (BOM) from string data. 905 * 906 * @param string $str Input string (UTF-8). 907 * 908 * @return string Stripped string (UTF-8). 909 */ 910 public static function trimUtf8Bom($str) 911 { 912 return (substr($str, 0, 3) == pack('CCC', 239, 187, 191)) 913 ? substr($str, 3) 914 : $str; 915 } 916 917 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body