Differences Between: [Versions 310 and 403] [Versions 311 and 403] [Versions 39 and 403] [Versions 400 and 403] [Versions 401 and 403]
1 <?php 2 /** 3 * Provides static methods for charset and locale safe string manipulation. 4 * 5 * Copyright 2003-2017 Horde LLC (http://www.horde.org/) 6 * 7 * See the enclosed file LICENSE for license information (LGPL). If you 8 * did not receive this file, see http://www.horde.org/licenses/lgpl21. 9 * 10 * @todo Split up in Horde_String_Multibyte for multibyte-safe methods and 11 * Horde_String_Locale for locale-safe methods. 12 * 13 * @author Jan Schneider <jan@horde.org> 14 * @category Horde 15 * @license http://www.horde.org/licenses/lgpl21 LGPL 2.1 16 * @package Util 17 */ 18 class Horde_String 19 { 20 /** 21 * lower() cache. 22 * 23 * @var array 24 */ 25 protected static $_lowers = array(); 26 27 /** 28 * upper() cache. 29 * 30 * @var array 31 */ 32 protected static $_uppers = array(); 33 34 /** 35 * Converts a string from one charset to another. 36 * 37 * Uses the iconv or the mbstring extensions. 38 * The original string is returned if conversion failed or none 39 * of the extensions were available. 40 * 41 * @param mixed $input The data to be converted. If $input is an an 42 * array, the array's values get converted 43 * recursively. 44 * @param string $from The string's current charset. 45 * @param string $to The charset to convert the string to. 46 * @param boolean $force Force conversion? 47 * 48 * @return mixed The converted input data. 49 */ 50 public static function convertCharset($input, $from, $to, $force = false) 51 { 52 /* Don't bother converting numbers. */ 53 if (is_numeric($input)) { 54 return $input; 55 } 56 57 /* If the from and to character sets are identical, return now. */ 58 if (!$force && $from == $to) { 59 return $input; 60 } 61 $from = self::lower($from); 62 $to = self::lower($to); 63 if (!$force && $from == $to) { 64 return $input; 65 } 66 67 if (is_array($input)) { 68 $tmp = array(); 69 foreach ($input as $key => $val) { 70 $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force); 71 } 72 return $tmp; 73 } 74 75 if (is_object($input)) { 76 // PEAR_Error/Exception objects are almost guaranteed to contain 77 // recursion, which will cause a segfault in PHP. We should never 78 // reach this line, but add a check. 79 if (($input instanceof Exception) || 80 ($input instanceof PEAR_Error)) { 81 return ''; 82 } 83 84 $input = clone $input; 85 $vars = get_object_vars($input); 86 foreach ($vars as $key => $val) { 87 $input->$key = self::convertCharset($val, $from, $to, $force); 88 } 89 return $input; 90 } 91 92 if (!is_string($input)) { 93 return $input; 94 } 95 96 return self::_convertCharset($input, $from, $to); 97 } 98 99 /** 100 * Internal function used to do charset conversion. 101 * 102 * @param string $input See self::convertCharset(). 103 * @param string $from See self::convertCharset(). 104 * @param string $to See self::convertCharset(). 105 * 106 * @return string The converted string. 107 */ 108 protected static function _convertCharset($input, $from, $to) 109 { 110 /* Use utf8_[en|de]code() if possible and if the string isn't too 111 * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these 112 * functions use more memory. */ 113 if (Horde_Util::extensionExists('xml') && 114 ((strlen($input) < 16777216) || 115 !Horde_Util::extensionExists('iconv') || 116 !Horde_Util::extensionExists('mbstring'))) { 117 if (($to == 'utf-8') && 118 function_exists('utf8_encode') && 119 in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) { 120 return @utf8_encode($input); 121 } 122 123 if (($from == 'utf-8') && 124 function_exists('utf8_decode') && 125 in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) { 126 return @utf8_decode($input); 127 } 128 } 129 130 /* Try UTF7-IMAP conversions. */ 131 if (($from == 'utf7-imap') || ($to == 'utf7-imap')) { 132 try { 133 if ($from == 'utf7-imap') { 134 return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to); 135 } else { 136 if ($from == 'utf-8') { 137 $conv = $input; 138 } else { 139 $conv = self::convertCharset($input, $from, 'UTF-8'); 140 } 141 return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv); 142 } 143 } catch (Horde_Imap_Client_Exception $e) { 144 return $input; 145 } 146 } 147 148 /* Try iconv with transliteration. */ 149 if (Horde_Util::extensionExists('iconv')) { 150 unset($php_errormsg); 151 ini_set('track_errors', 1); 152 $out = @iconv($from, $to . '//TRANSLIT', $input); 153 $errmsg = isset($php_errormsg); 154 ini_restore('track_errors'); 155 if (!$errmsg && $out !== false) { 156 return $out; 157 } 158 } 159 160 /* Try mbstring. */ 161 if (Horde_Util::extensionExists('mbstring')) { 162 try { 163 $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from)); 164 if (!empty($out)) { 165 return $out; 166 } 167 } catch (ValueError $e) { 168 // catch error thrown under PHP 8.0, if mbstring does not support the encoding 169 } 170 } 171 172 return $input; 173 } 174 175 /** 176 * Makes a string lowercase. 177 * 178 * @param string $string The string to be converted. 179 * @param boolean $locale If true the string will be converted based on 180 * a given charset, locale independent else. 181 * @param string $charset If $locale is true, the charset to use when 182 * converting. 183 * 184 * @return string The string with lowercase characters. 185 */ 186 public static function lower($string, $locale = false, $charset = null) 187 { 188 if ($locale) { 189 if (Horde_Util::extensionExists('mbstring')) { 190 if (is_null($charset)) { 191 throw new InvalidArgumentException('$charset argument must not be null'); 192 } 193 $ret = @mb_strtolower($string, self::_mbstringCharset($charset)); 194 if (!empty($ret)) { 195 return $ret; 196 } 197 } 198 return strtolower($string); 199 } 200 201 if (!isset(self::$_lowers[$string])) { 202 $language = setlocale(LC_CTYPE, 0); 203 setlocale(LC_CTYPE, 'C'); 204 if ($string === null) { 205 self::$_lowers[$string] = ''; 206 } else { 207 self::$_lowers[$string] = strtolower($string); 208 } 209 setlocale(LC_CTYPE, $language); 210 } 211 212 return self::$_lowers[$string]; 213 } 214 215 /** 216 * Makes a string uppercase. 217 * 218 * @param string $string The string to be converted. 219 * @param boolean $locale If true the string will be converted based on a 220 * given charset, locale independent else. 221 * @param string $charset If $locale is true, the charset to use when 222 * converting. If not provided the current charset. 223 * 224 * @return string The string with uppercase characters. 225 */ 226 public static function upper($string, $locale = false, $charset = null) 227 { 228 if ($locale) { 229 if (Horde_Util::extensionExists('mbstring')) { 230 if (is_null($charset)) { 231 throw new InvalidArgumentException('$charset argument must not be null'); 232 } 233 $ret = @mb_strtoupper($string, self::_mbstringCharset($charset)); 234 if (!empty($ret)) { 235 return $ret; 236 } 237 } 238 return strtoupper($string); 239 } 240 241 if (!isset(self::$_uppers[$string])) { 242 $language = setlocale(LC_CTYPE, 0); 243 setlocale(LC_CTYPE, 'C'); 244 self::$_uppers[$string] = strtoupper($string); 245 setlocale(LC_CTYPE, $language); 246 } 247 248 return self::$_uppers[$string]; 249 } 250 251 /** 252 * Returns a string with the first letter capitalized if it is 253 * alphabetic. 254 * 255 * @param string $string The string to be capitalized. 256 * @param boolean $locale If true the string will be converted based on a 257 * given charset, locale independent else. 258 * @param string $charset The charset to use, defaults to current charset. 259 * 260 * @return string The capitalized string. 261 */ 262 public static function ucfirst($string, $locale = false, $charset = null) 263 { 264 if ($locale) { 265 if (is_null($charset)) { 266 throw new InvalidArgumentException('$charset argument must not be null'); 267 } 268 $first = self::substr($string, 0, 1, $charset); 269 if (self::isAlpha($first, $charset)) { 270 $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset); 271 } 272 } else { 273 $string = self::upper(substr($string, 0, 1), false) . substr($string, 1); 274 } 275 276 return $string; 277 } 278 279 /** 280 * Returns a string with the first letter of each word capitalized if it is 281 * alphabetic. 282 * 283 * Sentences are splitted into words at whitestrings. 284 * 285 * @param string $string The string to be capitalized. 286 * @param boolean $locale If true the string will be converted based on a 287 * given charset, locale independent else. 288 * @param string $charset The charset to use, defaults to current charset. 289 * 290 * @return string The capitalized string. 291 */ 292 public static function ucwords($string, $locale = false, $charset = null) 293 { 294 $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE); 295 for ($i = 0, $c = count($words); $i < $c; $i += 2) { 296 $words[$i] = self::ucfirst($words[$i], $locale, $charset); 297 } 298 return implode('', $words); 299 } 300 301 /** 302 * Returns part of a string. 303 * 304 * @param string $string The string to be converted. 305 * @param integer $start The part's start position, zero based. 306 * @param integer $length The part's length. 307 * @param string $charset The charset to use when calculating the part's 308 * position and length, defaults to current 309 * charset. 310 * 311 * @return string The string's part. 312 */ 313 public static function substr($string, $start, $length = null, 314 $charset = 'UTF-8') 315 { 316 if (is_null($length)) { 317 $length = self::length($string, $charset) - $start; 318 } 319 320 if ($length === 0) { 321 return ''; 322 } 323 324 $error = false; 325 326 /* Try mbstring. */ 327 if (Horde_Util::extensionExists('mbstring')) { 328 $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset)); 329 330 /* mb_substr() returns empty string on failure. */ 331 if (strlen($ret)) { 332 return $ret; 333 } 334 $error = true; 335 } 336 337 /* Try iconv. */ 338 if (Horde_Util::extensionExists('iconv')) { 339 $ret = @iconv_substr($string, $start, $length, $charset); 340 341 /* iconv_substr() returns false on failure. */ 342 if ($ret !== false) { 343 return $ret; 344 } 345 $error = true; 346 } 347 348 /* Try intl. */ 349 if (Horde_Util::extensionExists('intl')) { 350 $ret = self::convertCharset( 351 @grapheme_substr( 352 self::convertCharset($string, $charset, 'UTF-8'), 353 $start, 354 $length 355 ), 356 'UTF-8', 357 $charset 358 ); 359 360 /* grapheme_substr() returns false on failure. */ 361 if ($ret !== false) { 362 return $ret; 363 } 364 $error = true; 365 } 366 367 return $error 368 ? '' 369 : substr($string, $start, $length); 370 } 371 372 /** 373 * Returns the character (not byte) length of a string. 374 * 375 * @param string $string The string to return the length of. 376 * @param string $charset The charset to use when calculating the string's 377 * length. 378 * 379 * @return integer The string's length. 380 */ 381 public static function length($string, $charset = 'UTF-8') 382 { 383 $charset = self::lower($charset); 384 385 if ($charset == 'utf-8' || $charset == 'utf8') { 386 if (Horde_Util::extensionExists('mbstring')) { 387 return strlen(mb_convert_encoding($string, 'ISO-8859-1', 'UTF-8')); 388 389 } else if (function_exists('utf8_decode')) { 390 return strlen(@utf8_decode($string)); 391 } 392 } 393 394 if (Horde_Util::extensionExists('mbstring')) { 395 $ret = @mb_strlen($string, self::_mbstringCharset($charset)); 396 if (!empty($ret)) { 397 return $ret; 398 } 399 } 400 if (Horde_Util::extensionExists('intl')) { 401 return grapheme_strlen( 402 self::convertCharset($string, $charset, 'UTF-8') 403 ); 404 } 405 406 return strlen($string); 407 } 408 409 /** 410 * Returns the numeric position of the first occurrence of $needle 411 * in the $haystack string. 412 * 413 * @param string $haystack The string to search through. 414 * @param string $needle The string to search for. 415 * @param integer $offset Character in $haystack to start searching at. 416 * @param string $charset Charset of $needle. 417 * 418 * @return integer The position of first occurrence. 419 */ 420 public static function pos( 421 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 422 ) 423 { 424 return self::_pos($haystack, $needle, $offset, $charset, 'strpos'); 425 } 426 427 /** 428 * Returns the numeric position of the first case-insensitive occurrence 429 * of $needle in the $haystack string. 430 * 431 * @since 2.5.0 432 * 433 * @param string $haystack The string to search through. 434 * @param string $needle The string to search for. 435 * @param integer $offset Character in $haystack to start searching at. 436 * @param string $charset Charset of $needle. 437 * 438 * @return integer The position of first case-insensitive occurrence. 439 */ 440 public static function ipos( 441 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 442 ) 443 { 444 return self::_pos($haystack, $needle, $offset, $charset, 'stripos'); 445 } 446 447 /** 448 * Returns the numeric position of the last occurrence of $needle 449 * in the $haystack string. 450 * 451 * @param string $haystack The string to search through. 452 * @param string $needle The string to search for. 453 * @param integer $offset Character in $haystack to start searching at. 454 * @param string $charset Charset of $needle. 455 * 456 * @return integer The position of last occurrence. 457 */ 458 public static function rpos( 459 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 460 ) 461 { 462 return self::_pos($haystack, $needle, $offset, $charset, 'strrpos'); 463 } 464 465 /** 466 * Returns the numeric position of the last case-insensitive occurrence of 467 * $needle in the $haystack string. 468 * 469 * @since 2.5.0 470 * 471 * @param string $haystack The string to search through. 472 * @param string $needle The string to search for. 473 * @param integer $offset Character in $haystack to start searching at. 474 * @param string $charset Charset of $needle. 475 * 476 * @return integer The position of last case-insensitive occurrence. 477 */ 478 public static function ripos( 479 $haystack, $needle, $offset = 0, $charset = 'UTF-8' 480 ) 481 { 482 return self::_pos($haystack, $needle, $offset, $charset, 'strripos'); 483 } 484 485 /** 486 * Perform string position searches. 487 * 488 * @param string $haystack The string to search through. 489 * @param string $needle The string to search for. 490 * @param integer $offset Character in $haystack to start searching at. 491 * @param string $charset Charset of $needle. 492 * @param string $func Function to use. 493 * 494 * @return integer The position of occurrence. 495 * 496 */ 497 protected static function _pos( 498 $haystack, $needle, $offset, $charset, $func 499 ) 500 { 501 if (Horde_Util::extensionExists('mbstring')) { 502 unset($php_errormsg); 503 $track_errors = ini_set('track_errors', 1); 504 $ret = @call_user_func('mb_' . $func, $haystack, $needle, $offset, self::_mbstringCharset($charset)); 505 ini_set('track_errors', $track_errors); 506 if (!isset($php_errormsg)) { 507 return $ret; 508 } 509 } 510 511 if (Horde_Util::extensionExists('intl')) { 512 unset($php_errormsg); 513 $track_errors = ini_set('track_errors', 1); 514 $ret = self::convertCharset( 515 @call_user_func( 516 'grapheme_' . $func, 517 self::convertCharset($haystack, $charset, 'UTF-8'), 518 self::convertCharset($needle, $charset, 'UTF-8'), 519 $offset 520 ), 521 'UTF-8', 522 $charset 523 ); 524 ini_set('track_errors', $track_errors); 525 if (!isset($php_errormsg)) { 526 return $ret; 527 } 528 } 529 530 return $func($haystack, $needle, $offset); 531 } 532 533 /** 534 * Returns a string padded to a certain length with another string. 535 * This method behaves exactly like str_pad() but is multibyte safe. 536 * 537 * @param string $input The string to be padded. 538 * @param integer $length The length of the resulting string. 539 * @param string $pad The string to pad the input string with. Must 540 * be in the same charset like the input string. 541 * @param const $type The padding type. One of STR_PAD_LEFT, 542 * STR_PAD_RIGHT, or STR_PAD_BOTH. 543 * @param string $charset The charset of the input and the padding 544 * strings. 545 * 546 * @return string The padded string. 547 */ 548 public static function pad($input, $length, $pad = ' ', 549 $type = STR_PAD_RIGHT, $charset = 'UTF-8') 550 { 551 $mb_length = self::length($input, $charset); 552 $sb_length = strlen($input); 553 $pad_length = self::length($pad, $charset); 554 555 /* Return if we already have the length. */ 556 if ($mb_length >= $length) { 557 return $input; 558 } 559 560 /* Shortcut for single byte strings. */ 561 if ($mb_length == $sb_length && $pad_length == strlen($pad)) { 562 return str_pad($input, $length, $pad, $type); 563 } 564 565 switch ($type) { 566 case STR_PAD_LEFT: 567 $left = $length - $mb_length; 568 $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input; 569 break; 570 571 case STR_PAD_BOTH: 572 $left = floor(($length - $mb_length) / 2); 573 $right = ceil(($length - $mb_length) / 2); 574 $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . 575 $input . 576 self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset); 577 break; 578 579 case STR_PAD_RIGHT: 580 $right = $length - $mb_length; 581 $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset); 582 break; 583 } 584 585 return $output; 586 } 587 588 /** 589 * Wraps the text of a message. 590 * 591 * @param string $string String containing the text to wrap. 592 * @param integer $width Wrap the string at this number of 593 * characters. 594 * @param string $break Character(s) to use when breaking lines. 595 * @param boolean $cut Whether to cut inside words if a line 596 * can't be wrapped. 597 * @param boolean $line_folding Whether to apply line folding rules per 598 * RFC 822 or similar. The correct break 599 * characters including leading whitespace 600 * have to be specified too. 601 * 602 * @return string String containing the wrapped text. 603 */ 604 public static function wordwrap($string, $width = 75, $break = "\n", 605 $cut = false, $line_folding = false) 606 { 607 $breakRegex = '(?:' . preg_quote($break) . ')'; 608 $rpos = self::rpos($break, "\n"); 609 if ($rpos === false) { 610 $rpos = 0; 611 } else { 612 $rpos++; 613 } 614 $wrapped = ''; 615 $hasWrapped = false; 616 617 while (self::length($string, 'UTF-8') > $width) { 618 $line = self::substr($string, 0, $width + ($hasWrapped ? $rpos : 0), 'UTF-8'); 619 $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8'); 620 621 // Make sure we didn't cut a word, unless we want hard breaks 622 // anyway. 623 if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) { 624 $line .= $match[1]; 625 $string = $match[2]; 626 } 627 628 // Wrap at existing line breaks. 629 $regex = '/^(' . ($hasWrapped ? $breakRegex : '') . '.*?)(\r?\n)(.*)$/us'; 630 if (preg_match($regex, $line, $match)) { 631 $wrapped .= $match[1] . $match[2]; 632 $string = $match[3] . $string; 633 $hasWrapped = false; 634 continue; 635 } 636 637 // Wrap at the last colon or semicolon followed by a whitespace if 638 // doing line folding. 639 if ($line_folding && 640 preg_match('/^(.*?)(;|:)(\s+.*)$/us', $line, $match)) { 641 $wrapped .= $match[1] . $match[2]; 642 $string = $break . $match[3] . $string; 643 $hasWrapped = true; 644 continue; 645 } 646 647 // Wrap at the last whitespace of $line. 648 $sub = $line_folding 649 ? '(' . ($hasWrapped ? $breakRegex : '') . '.+[^\s])' 650 : '(' . ($hasWrapped ? $breakRegex : '') . '.*)'; 651 652 if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) { 653 $wrapped .= $match[1]; 654 $string = $break . ($line_folding ? $match[2] : '') 655 . $match[3] . $string; 656 $hasWrapped = true; 657 continue; 658 } 659 660 // Hard wrap if necessary. 661 if ($cut) { 662 $wrapped .= $line; 663 $string = $break . $string; 664 $hasWrapped = true; 665 continue; 666 } 667 668 $wrapped .= $line; 669 $hasWrapped = false; 670 } 671 672 return $wrapped . $string; 673 } 674 675 /** 676 * Wraps the text of a message. 677 * 678 * @param string $text String containing the text to wrap. 679 * @param integer $length Wrap $text at this number of characters. 680 * @param string $break_char Character(s) to use when breaking lines. 681 * @param boolean $quote Ignore lines that are wrapped with the '>' 682 * character (RFC 2646)? If true, we don't 683 * remove any padding whitespace at the end of 684 * the string. 685 * 686 * @return string String containing the wrapped text. 687 */ 688 public static function wrap($text, $length = 80, $break_char = "\n", 689 $quote = false) 690 { 691 $paragraphs = array(); 692 693 foreach (preg_split('/\r?\n/', $text) as $input) { 694 if ($quote && (strpos($input, '>') === 0)) { 695 $line = $input; 696 } else { 697 /* We need to handle the Usenet-style signature line 698 * separately; since the space after the two dashes is 699 * REQUIRED, we don't want to trim the line. */ 700 if ($input != '-- ') { 701 $input = rtrim($input); 702 } 703 $line = self::wordwrap($input, $length, $break_char); 704 } 705 706 $paragraphs[] = $line; 707 } 708 709 return implode($break_char, $paragraphs); 710 } 711 712 /** 713 * Return a truncated string, suitable for notifications. 714 * 715 * @param string $text The original string. 716 * @param integer $length The maximum length. 717 * 718 * @return string The truncated string, if longer than $length. 719 */ 720 public static function truncate($text, $length = 100) 721 { 722 return (self::length($text) > $length) 723 ? rtrim(self::substr($text, 0, $length - 3)) . '...' 724 : $text; 725 } 726 727 /** 728 * Return an abbreviated string, with characters in the middle of the 729 * excessively long string replaced by '...'. 730 * 731 * @param string $text The original string. 732 * @param integer $length The length at which to abbreviate. 733 * 734 * @return string The abbreviated string, if longer than $length. 735 */ 736 public static function abbreviate($text, $length = 20) 737 { 738 return (self::length($text) > $length) 739 ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1)) 740 : $text; 741 } 742 743 /** 744 * Returns the common leading part of two strings. 745 * 746 * @param string $str1 A string. 747 * @param string $str2 Another string. 748 * 749 * @return string The start of $str1 and $str2 that is identical in both. 750 */ 751 public static function common($str1, $str2) 752 { 753 for ($result = '', $i = 0; 754 isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i]; 755 $i++) { 756 $result .= $str1[$i]; 757 } 758 return $result; 759 } 760 761 /** 762 * Returns true if the every character in the parameter is an alphabetic 763 * character. 764 * 765 * @param string $string The string to test. 766 * @param string $charset The charset to use when testing the string. 767 * 768 * @return boolean True if the parameter was alphabetic only. 769 */ 770 public static function isAlpha($string, $charset) 771 { 772 if (!Horde_Util::extensionExists('mbstring')) { 773 return ctype_alpha($string); 774 } 775 776 $charset = self::_mbstringCharset($charset); 777 $old_charset = mb_regex_encoding(); 778 779 if ($charset != $old_charset) { 780 @mb_regex_encoding($charset); 781 } 782 $alpha = !@mb_ereg_match('[^[:alpha:]]', $string); 783 if ($charset != $old_charset) { 784 @mb_regex_encoding($old_charset); 785 } 786 787 return $alpha; 788 } 789 790 /** 791 * Returns true if ever character in the parameter is a lowercase letter in 792 * the current locale. 793 * 794 * @param string $string The string to test. 795 * @param string $charset The charset to use when testing the string. 796 * 797 * @return boolean True if the parameter was lowercase. 798 */ 799 public static function isLower($string, $charset) 800 { 801 return ((self::lower($string, true, $charset) === $string) && 802 self::isAlpha($string, $charset)); 803 } 804 805 /** 806 * Returns true if every character in the parameter is an uppercase letter 807 * in the current locale. 808 * 809 * @param string $string The string to test. 810 * @param string $charset The charset to use when testing the string. 811 * 812 * @return boolean True if the parameter was uppercase. 813 */ 814 public static function isUpper($string, $charset) 815 { 816 return ((self::upper($string, true, $charset) === $string) && 817 self::isAlpha($string, $charset)); 818 } 819 820 /** 821 * Performs a multibyte safe regex match search on the text provided. 822 * 823 * @param string $text The text to search. 824 * @param array $regex The regular expressions to use, without perl 825 * regex delimiters (e.g. '/' or '|'). 826 * @param string $charset The character set of the text. 827 * 828 * @return array The matches array from the first regex that matches. 829 */ 830 public static function regexMatch($text, $regex, $charset = null) 831 { 832 if (!empty($charset)) { 833 $regex = self::convertCharset($regex, $charset, 'utf-8'); 834 $text = self::convertCharset($text, $charset, 'utf-8'); 835 } 836 837 $matches = array(); 838 foreach ($regex as $val) { 839 if (preg_match('/' . $val . '/u', $text, $matches)) { 840 break; 841 } 842 } 843 844 if (!empty($charset)) { 845 $matches = self::convertCharset($matches, 'utf-8', $charset); 846 } 847 848 return $matches; 849 } 850 851 /** 852 * Check to see if a string is valid UTF-8. 853 * 854 * @param string $text The text to check. 855 * 856 * @return boolean True if valid UTF-8. 857 */ 858 public static function validUtf8($text) 859 { 860 $text = strval($text); 861 862 // First check for illegal surrogate pair sequences. See RFC 3629. 863 if (preg_match('/\xE0[\x80-\x9F][\x80-\xBF]|\xED[\xA0-\xBF][\x80-\xBF]/S', $text)) { 864 return false; 865 } 866 867 for ($i = 0, $len = strlen($text); $i < $len; ++$i) { 868 $c = ord($text[$i]); 869 if ($c > 128) { 870 if ($c > 247) { 871 // STD 63 (RFC 3629) eliminates 5 & 6-byte characters. 872 return false; 873 } elseif ($c > 239) { 874 $j = 3; 875 } elseif ($c > 223) { 876 $j = 2; 877 } elseif ($c > 191) { 878 $j = 1; 879 } else { 880 return false; 881 } 882 883 if (($i + $j) > $len) { 884 return false; 885 } 886 887 do { 888 $c = ord($text[++$i]); 889 if (($c < 128) || ($c > 191)) { 890 return false; 891 } 892 } while (--$j); 893 } 894 } 895 896 return true; 897 } 898 899 /** 900 * Workaround charsets that don't work with mbstring functions. 901 * 902 * @param string $charset The original charset. 903 * 904 * @return string The charset to use with mbstring functions. 905 */ 906 protected static function _mbstringCharset($charset) 907 { 908 /* mbstring functions do not handle the 'ks_c_5601-1987' & 909 * 'ks_c_5601-1989' charsets. However, these charsets are used, for 910 * example, by various versions of Outlook to send Korean characters. 911 * Use UHC (CP949) encoding instead. See, e.g., 912 * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */ 913 return in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989')) 914 ? 'UHC' 915 : $charset; 916 } 917 918 /** 919 * Strip UTF-8 byte order mark (BOM) from string data. 920 * 921 * @param string $str Input string (UTF-8). 922 * 923 * @return string Stripped string (UTF-8). 924 */ 925 public static function trimUtf8Bom($str) 926 { 927 return (substr($str, 0, 3) == pack('CCC', 239, 187, 191)) 928 ? substr($str, 3) 929 : $str; 930 } 931 932 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body