Differences Between: [Versions 401 and 402]
1 <?php 2 3 namespace PhpXmlRpc\Helper; 4 5 use PhpXmlRpc\PhpXmlRpc; 6 use PhpXmlRpc\Traits\DeprecationLogger; 7 use PhpXmlRpc\Value; 8 9 /** 10 * Deals with parsing the XML. 11 * @see http://xmlrpc.com/spec.md 12 * 13 * @todo implement an interface to allow for alternative implementations 14 * - make access to $_xh protected, return more high-level data structures 15 * - move the private parts of $_xh to the internal-use parsing-options config 16 * - add parseRequest, parseResponse, parseValue methods 17 * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding 18 * while parsing, which is faster than doing it later by going over the rebuilt data structure 19 * @todo rename? This is an xml-rpc parser, not a generic xml parser... 20 * 21 * @property array $xmlrpc_valid_parents deprecated - public access left in purely for BC 22 * @property int $accept deprecated - (protected) access left in purely for BC 23 */ 24 class XMLParser 25 { 26 use DeprecationLogger; 27 28 const RETURN_XMLRPCVALS = 'xmlrpcvals'; 29 const RETURN_EPIVALS = 'epivals'; 30 const RETURN_PHP = 'phpvals'; 31 32 const ACCEPT_REQUEST = 1; 33 const ACCEPT_RESPONSE = 2; 34 const ACCEPT_VALUE = 4; 35 const ACCEPT_FAULT = 8; 36 37 /** 38 * @var int 39 * The max length beyond which data will get truncated in error messages 40 */ 41 protected $maxLogValueLength = 100; 42 43 /** 44 * @var array 45 * Used to store state during parsing and to pass parsing results to callers. 46 * Quick explanation of components: 47 * private: 48 * ac - used to accumulate values 49 * stack - array with genealogy of xml elements names, used to validate nesting of xml-rpc elements 50 * valuestack - array used for parsing arrays and structs 51 * lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings 52 * (values: 0=not looking, 1=looking, 3=found) 53 * public: 54 * isf - used to indicate an xml-rpc response fault (1), invalid xml-rpc fault (2), xml parsing fault (3) 55 * isf_reason - used for storing xml-rpc response fault string 56 * value - used to store the value in responses 57 * method - used to store method name in requests 58 * params - used to store parameters in requests 59 * pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values 60 * rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode) 61 */ 62 protected $_xh = array( 63 'ac' => '', 64 'stack' => array(), 65 'valuestack' => array(), 66 'lv' => 0, 67 'isf' => 0, 68 'isf_reason' => '', 69 'value' => null, 70 'method' => false, 71 'params' => array(), 72 'pt' => array(), 73 'rt' => '', 74 ); 75 76 /** 77 * @var array[] 78 */ 79 protected $xmlrpc_valid_parents = array( 80 'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'), 81 'BOOLEAN' => array('VALUE'), 82 'I4' => array('VALUE'), 83 'I8' => array('VALUE'), 84 'EX:I8' => array('VALUE'), 85 'INT' => array('VALUE'), 86 'STRING' => array('VALUE'), 87 'DOUBLE' => array('VALUE'), 88 'DATETIME.ISO8601' => array('VALUE'), 89 'BASE64' => array('VALUE'), 90 'MEMBER' => array('STRUCT'), 91 'NAME' => array('MEMBER'), 92 'DATA' => array('ARRAY'), 93 'ARRAY' => array('VALUE'), 94 'STRUCT' => array('VALUE'), 95 'PARAM' => array('PARAMS'), 96 'METHODNAME' => array('METHODCALL'), 97 'PARAMS' => array('METHODCALL', 'METHODRESPONSE'), 98 'FAULT' => array('METHODRESPONSE'), 99 'NIL' => array('VALUE'), // only used when extension activated 100 'EX:NIL' => array('VALUE'), // only used when extension activated 101 ); 102 103 /** @var array $parsing_options */ 104 protected $parsing_options = array(); 105 106 /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */ 107 //protected $accept = 3; 108 109 /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */ 110 protected $maxChunkLength = 4194304; 111 /** @var array 112 * Used keys: accept, target_charset, methodname_callback, plus the ones set here. 113 * We initialize it partially to help keep BC with subclasses which might have reimplemented `parse()` but not 114 * the element handler methods 115 */ 116 protected $current_parsing_options = array( 117 'xmlrpc_null_extension' => false, 118 'xmlrpc_return_datetimes' => false, 119 'xmlrpc_reject_invalid_values' => false 120 ); 121 122 /** 123 * @param array $options integer keys: options passed to the inner xml parser 124 * string keys: 125 * - target_charset (string) 126 * - methodname_callback (callable) 127 * - xmlrpc_null_extension (bool) 128 * - xmlrpc_return_datetimes (bool) 129 * - xmlrpc_reject_invalid_values (bool) 130 */ 131 public function __construct(array $options = array()) 132 { 133 $this->parsing_options = $options; 134 } 135 136 /** 137 * Parses an xml-rpc xml string. Results of the parsing are found in $this->['_xh']. 138 * Logs to the error log any issues which do not cause the parsing to fail. 139 * 140 * @param string $data 141 * @param string $returnType self::RETURN_XMLRPCVALS, self::RETURN_PHP, self::RETURN_EPIVALS 142 * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE 143 * @param array $options integer-key options are passed to the xml parser, string-key options are used independently. 144 * These options are added to options received in the constructor. 145 * Note that if options xmlrpc_null_extension, xmlrpc_return_datetimes and xmlrpc_reject_invalid_values 146 * are not set, the default settings from PhpXmlRpc\PhpXmlRpc are used 147 * @return array see the definition of $this->_xh for the meaning of the results 148 * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions) 149 * 150 * @todo refactor? we could 1. return the parsed data structure, and 2. move $returnType and $accept into options 151 * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc::$xmlrpc_XXX_format, so 152 * that parsing will be completely independent of global state. Note that it might incur a small perf hit... 153 */ 154 public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array()) 155 { 156 $this->_xh = array( 157 'ac' => '', 158 'stack' => array(), 159 'valuestack' => array(), 160 'lv' => 0, 161 'isf' => 0, 162 'isf_reason' => '', 163 'value' => null, 164 'method' => false, // so we can check later if we got a methodname or not 165 'params' => array(), 166 'pt' => array(), 167 'rt' => '', 168 ); 169 170 $len = strlen($data); 171 172 // we test for empty documents here to save on resource allocation and simplify the chunked-parsing loop below 173 if ($len == 0) { 174 $this->_xh['isf'] = 3; 175 $this->_xh['isf_reason'] = 'XML error 5: empty document'; 176 return $this->_xh; 177 } 178 179 $this->current_parsing_options = array('accept' => $accept); 180 181 $mergedOptions = $this->parsing_options; 182 foreach ($options as $key => $val) { 183 $mergedOptions[$key] = $val; 184 } 185 186 foreach ($mergedOptions as $key => $val) { 187 // q: can php be built without ctype? should we use a regexp? 188 if (is_string($key) && !ctype_digit($key)) { 189 /// @todo on invalid options, throw/error-out instead of logging an error message? 190 switch($key) { 191 case 'target_charset': 192 if (function_exists('mb_convert_encoding')) { 193 $this->current_parsing_options['target_charset'] = $val; 194 } else { 195 $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": 'target_charset' option is unsupported without mbstring"); 196 } 197 break; 198 199 case 'methodname_callback': 200 if (is_callable($val)) { 201 $this->current_parsing_options['methodname_callback'] = $val; 202 } else { 203 $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": Callback passed as 'methodname_callback' is not callable"); 204 } 205 break; 206 207 case 'xmlrpc_null_extension': 208 case 'xmlrpc_return_datetimes': 209 case 'xmlrpc_reject_invalid_values': 210 $this->current_parsing_options[$key] = $val; 211 break; 212 213 default: 214 $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": unsupported option: $key"); 215 } 216 unset($mergedOptions[$key]); 217 } 218 } 219 220 if (!isset($this->current_parsing_options['xmlrpc_null_extension'])) { 221 $this->current_parsing_options['xmlrpc_null_extension'] = PhpXmlRpc::$xmlrpc_null_extension; 222 } 223 if (!isset($this->current_parsing_options['xmlrpc_return_datetimes'])) { 224 $this->current_parsing_options['xmlrpc_return_datetimes'] = PhpXmlRpc::$xmlrpc_return_datetimes; 225 } 226 if (!isset($this->current_parsing_options['xmlrpc_reject_invalid_values'])) { 227 $this->current_parsing_options['xmlrpc_reject_invalid_values'] = PhpXmlRpc::$xmlrpc_reject_invalid_values; 228 } 229 230 // NB: we use '' instead of null to force charset detection from the xml declaration 231 $parser = xml_parser_create(''); 232 233 foreach ($mergedOptions as $key => $val) { 234 xml_parser_set_option($parser, $key, $val); 235 } 236 237 // always set this, in case someone tries to disable it via options... 238 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1); 239 240 xml_set_object($parser, $this); 241 242 switch ($returnType) { 243 case self::RETURN_PHP: 244 xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); 245 break; 246 case self::RETURN_EPIVALS: 247 xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi'); 248 break; 249 /// @todo log an error / throw / error-out on unsupported return type 250 case XMLParser::RETURN_XMLRPCVALS: 251 default: 252 xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); 253 } 254 255 xml_set_character_data_handler($parser, 'xmlrpc_cd'); 256 xml_set_default_handler($parser, 'xmlrpc_dh'); 257 258 try { 259 // @see ticket #70 - we have to parse big xml docs in chunks to avoid errors 260 for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) { 261 $chunk = substr($data, $offset, $this->maxChunkLength); 262 // error handling: xml not well formed 263 if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) { 264 $errCode = xml_get_error_code($parser); 265 $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode), 266 xml_get_current_line_number($parser), xml_get_current_column_number($parser)); 267 268 $this->_xh['isf'] = 3; 269 $this->_xh['isf_reason'] = $errStr; 270 } 271 // no need to parse further if we already have a fatal error 272 if ($this->_xh['isf'] >= 2) { 273 break; 274 } 275 } 276 /// @todo bump minimum php version to 5.5 and use a finally clause instead of doing cleanup 3 times 277 } catch (\Exception $e) { 278 xml_parser_free($parser); 279 $this->current_parsing_options = array(); 280 /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ? 281 throw $e; 282 } catch (\Error $e) { 283 xml_parser_free($parser); 284 $this->current_parsing_options = array(); 285 //$this->accept = $prevAccept; 286 /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ? 287 throw $e; 288 } 289 290 xml_parser_free($parser); 291 $this->current_parsing_options = array(); 292 293 return $this->_xh; 294 } 295 296 /** 297 * xml parser handler function for opening element tags. 298 * @internal 299 * 300 * @param resource $parser 301 * @param string $name 302 * @param $attrs 303 * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead 304 * @return void 305 * 306 * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing 307 * and remove the checking for $this->_xh['isf'] >= 2 everywhere 308 */ 309 public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false) 310 { 311 // if invalid xml-rpc already detected, skip all processing 312 if ($this->_xh['isf'] >= 2) { 313 return; 314 } 315 316 // check for correct element nesting 317 if (count($this->_xh['stack']) == 0) { 318 // top level element can only be of 2 types 319 /// @todo optimization creep: save this check into a bool variable, instead of using count() every time: 320 /// there is only a single top level element in xml anyway 321 322 // BC 323 if ($acceptSingleVals === false) { 324 $accept = $this->current_parsing_options['accept']; 325 } else { 326 $this->logDeprecation('Using argument $acceptSingleVals for method ' . __METHOD__ . ' is deprecated'); 327 $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE; 328 } 329 if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) || 330 ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) || 331 ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) || 332 ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) { 333 $this->_xh['rt'] = strtolower($name); 334 } else { 335 $this->_xh['isf'] = 2; 336 $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name; 337 338 return; 339 } 340 } else { 341 // not top level element: see if parent is OK 342 $parent = end($this->_xh['stack']); 343 if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) { 344 $this->_xh['isf'] = 2; 345 $this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent"; 346 347 return; 348 } 349 } 350 351 switch ($name) { 352 // optimize for speed switch cases: most common cases first 353 case 'VALUE': 354 /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element 355 $this->_xh['vt'] = 'value'; // indicator: no value found yet 356 $this->_xh['ac'] = ''; 357 $this->_xh['lv'] = 1; 358 $this->_xh['php_class'] = null; 359 break; 360 361 case 'I8': 362 case 'EX:I8': 363 if (PHP_INT_SIZE === 4) { 364 // INVALID ELEMENT: RAISE ISF so that it is later recognized!!! 365 $this->_xh['isf'] = 2; 366 $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode"; 367 368 return; 369 } 370 // fall through voluntarily 371 372 case 'I4': 373 case 'INT': 374 case 'STRING': 375 case 'BOOLEAN': 376 case 'DOUBLE': 377 case 'DATETIME.ISO8601': 378 case 'BASE64': 379 if ($this->_xh['vt'] != 'value') { 380 // two data elements inside a value: an error occurred! 381 $this->_xh['isf'] = 2; 382 $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; 383 384 return; 385 } 386 $this->_xh['ac'] = ''; // reset the accumulator 387 break; 388 389 case 'STRUCT': 390 case 'ARRAY': 391 if ($this->_xh['vt'] != 'value') { 392 // two data elements inside a value: an error occurred! 393 $this->_xh['isf'] = 2; 394 $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; 395 396 return; 397 } 398 // create an empty array to hold child values, and push it onto appropriate stack 399 $curVal = array( 400 'values' => array(), 401 'type' => $name, 402 ); 403 // check for out-of-band information to rebuild php objs and, in case it is found, save it 404 if (@isset($attrs['PHP_CLASS'])) { 405 $curVal['php_class'] = $attrs['PHP_CLASS']; 406 } 407 $this->_xh['valuestack'][] = $curVal; 408 $this->_xh['vt'] = 'data'; // be prepared for a data element next 409 break; 410 411 case 'DATA': 412 if ($this->_xh['vt'] != 'data') { 413 // two data elements inside a value: an error occurred! 414 $this->_xh['isf'] = 2; 415 $this->_xh['isf_reason'] = "found two data elements inside an array element"; 416 417 return; 418 } 419 420 case 'METHODCALL': 421 case 'METHODRESPONSE': 422 case 'PARAMS': 423 // valid elements that add little to processing 424 break; 425 426 case 'METHODNAME': 427 case 'NAME': 428 /// @todo we could check for 2 NAME elements inside a MEMBER element 429 $this->_xh['ac'] = ''; 430 break; 431 432 case 'FAULT': 433 $this->_xh['isf'] = 1; 434 break; 435 436 case 'MEMBER': 437 // set member name to null, in case we do not find in the xml later on 438 $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = null; 439 //$this->_xh['ac']=''; 440 // Drop trough intentionally 441 442 case 'PARAM': 443 // clear value type, so we can check later if no value has been passed for this param/member 444 $this->_xh['vt'] = null; 445 break; 446 447 case 'NIL': 448 case 'EX:NIL': 449 if ($this->current_parsing_options['xmlrpc_null_extension']) { 450 if ($this->_xh['vt'] != 'value') { 451 // two data elements inside a value: an error occurred! 452 $this->_xh['isf'] = 2; 453 $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; 454 455 return; 456 } 457 // reset the accumulator - q: is this necessary at all here? we don't use it on _ee anyway for NILs 458 $this->_xh['ac'] = ''; 459 460 } else { 461 $this->_xh['isf'] = 2; 462 $this->_xh['isf_reason'] = 'Invalid NIL value received. Support for NIL can be enabled via \\PhpXmlRpc\\PhpXmlRpc::$xmlrpc_null_extension'; 463 464 return; 465 } 466 break; 467 468 default: 469 // INVALID ELEMENT: RAISE ISF so that it is later recognized 470 /// @todo feature creep = allow a callback instead 471 $this->_xh['isf'] = 2; 472 $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name"; 473 474 return; 475 } 476 477 // Save current element name to stack, to validate nesting 478 $this->_xh['stack'][] = $name; 479 480 /// @todo optimization creep: move this inside the big switch() above 481 if ($name != 'VALUE') { 482 $this->_xh['lv'] = 0; 483 } 484 } 485 486 /** 487 * xml parser handler function for close element tags. 488 * @internal 489 * 490 * @param resource $parser 491 * @param string $name 492 * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility 493 * @return void 494 * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions) 495 * 496 * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing 497 * and remove the checking for $this->_xh['isf'] >= 2 everywhere 498 */ 499 public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1) 500 { 501 if ($this->_xh['isf'] >= 2) { 502 return; 503 } 504 505 // push this element name from stack 506 // NB: if XML validates, correct opening/closing is guaranteed and we do not have to check for $name == $currElem. 507 // we also checked for proper nesting at start of elements... 508 $currElem = array_pop($this->_xh['stack']); 509 510 switch ($name) { 511 case 'VALUE': 512 // If no scalar was inside <VALUE></VALUE>, it was a string value 513 if ($this->_xh['vt'] == 'value') { 514 $this->_xh['value'] = $this->_xh['ac']; 515 $this->_xh['vt'] = Value::$xmlrpcString; 516 } 517 518 // in case there is charset conversion required, do it here, to catch both cases of string values 519 if (isset($this->current_parsing_options['target_charset']) && $this->_xh['vt'] === Value::$xmlrpcString) { 520 $this->_xh['value'] = mb_convert_encoding($this->_xh['value'], $this->current_parsing_options['target_charset'], 'UTF-8'); 521 } 522 523 if ($rebuildXmlrpcvals > 0) { 524 // build the xml-rpc val out of the data received, and substitute it 525 $temp = new Value($this->_xh['value'], $this->_xh['vt']); 526 // in case we got info about underlying php class, save it in the object we're rebuilding 527 if (isset($this->_xh['php_class'])) { 528 $temp->_php_class = $this->_xh['php_class']; 529 } 530 $this->_xh['value'] = $temp; 531 } elseif ($rebuildXmlrpcvals < 0) { 532 if ($this->_xh['vt'] == Value::$xmlrpcDateTime) { 533 $this->_xh['value'] = (object)array( 534 'xmlrpc_type' => 'datetime', 535 'scalar' => $this->_xh['value'], 536 'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value']) 537 ); 538 } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) { 539 $this->_xh['value'] = (object)array( 540 'xmlrpc_type' => 'base64', 541 'scalar' => $this->_xh['value'] 542 ); 543 } 544 } else { 545 /// @todo this should handle php-serialized objects, since std deserializing is done 546 /// by php_xmlrpc_decode, which we will not be calling... 547 //if (isset($this->_xh['php_class'])) { 548 //} 549 } 550 551 // check if we are inside an array or struct: 552 // if value just built is inside an array, let's move it into array on the stack 553 $vscount = count($this->_xh['valuestack']); 554 if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { 555 $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; 556 } 557 break; 558 559 case 'STRING': 560 $this->_xh['vt'] = Value::$xmlrpcString; 561 $this->_xh['lv'] = 3; // indicate we've found a value 562 $this->_xh['value'] = $this->_xh['ac']; 563 break; 564 565 case 'BOOLEAN': 566 $this->_xh['vt'] = Value::$xmlrpcBoolean; 567 $this->_xh['lv'] = 3; // indicate we've found a value 568 // We translate boolean 1 or 0 into PHP constants true or false. Strings 'true' and 'false' are accepted, 569 // even though the spec never mentions them (see e.g. Blogger api docs) 570 // NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here 571 // Note the non-strict type check: it will allow ' 1 ' 572 /// @todo feature-creep: use a flexible regexp, the same as we do with int, double and datetime. 573 /// Note that using a regexp would also make this test less sensitive to phpunit shenanigans, and 574 /// to changes in the way php compares strings (since 8.0, leading and trailing newlines are 575 /// accepted when deciding if a string numeric...) 576 if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') === 0) { 577 $this->_xh['value'] = true; 578 } else { 579 // log if receiving something strange, even though we set the value to false anyway 580 /// @todo to be consistent with the other types, we should return a value outside the good-value domain, e.g. NULL 581 if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') !== 0) { 582 if (!$this->handleParsingError('invalid data received in BOOLEAN value: ' . 583 $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { 584 return; 585 } 586 } 587 $this->_xh['value'] = false; 588 } 589 break; 590 591 case 'EX:I8': 592 $name = 'i8'; 593 // fall through voluntarily 594 case 'I4': 595 case 'I8': 596 case 'INT': 597 // NB: we build the Value object with the original xml element name found, except for ex:i8. The 598 // `Value::scalarTyp()` function will do some normalization of the data 599 $this->_xh['vt'] = strtolower($name); 600 $this->_xh['lv'] = 3; // indicate we've found a value 601 if (!preg_match(PhpXmlRpc::$xmlrpc_int_format, $this->_xh['ac'])) { 602 if (!$this->handleParsingError('non numeric data received in INT value: ' . 603 $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { 604 return; 605 } 606 /// @todo: find a better way of reporting an error value than this! Use NaN? 607 $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; 608 } else { 609 // it's ok, add it on 610 $this->_xh['value'] = (int)$this->_xh['ac']; 611 } 612 break; 613 614 case 'DOUBLE': 615 $this->_xh['vt'] = Value::$xmlrpcDouble; 616 $this->_xh['lv'] = 3; // indicate we've found a value 617 if (!preg_match(PhpXmlRpc::$xmlrpc_double_format, $this->_xh['ac'])) { 618 if (!$this->handleParsingError('non numeric data received in DOUBLE value: ' . 619 $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { 620 return; 621 } 622 623 $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; 624 } else { 625 // it's ok, add it on 626 $this->_xh['value'] = (double)$this->_xh['ac']; 627 } 628 break; 629 630 case 'DATETIME.ISO8601': 631 $this->_xh['vt'] = Value::$xmlrpcDateTime; 632 $this->_xh['lv'] = 3; // indicate we've found a value 633 if (!preg_match(PhpXmlRpc::$xmlrpc_datetime_format, $this->_xh['ac'])) { 634 if (!$this->handleParsingError('invalid data received in DATETIME value: ' . 635 $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { 636 return; 637 } 638 } 639 if ($this->current_parsing_options['xmlrpc_return_datetimes']) { 640 try { 641 $this->_xh['value'] = new \DateTime($this->_xh['ac']); 642 643 // the default regex used to validate the date string a few lines above should make this case impossible, 644 // but one never knows... 645 } catch(\Exception $e) { 646 // what to do? We can not guarantee that a valid date can be created. We return null... 647 if (!$this->handleParsingError('invalid data received in DATETIME value. Error ' . 648 $e->getMessage(), __METHOD__)) { 649 return; 650 } 651 } 652 } else { 653 $this->_xh['value'] = $this->_xh['ac']; 654 } 655 break; 656 657 case 'BASE64': 658 $this->_xh['vt'] = Value::$xmlrpcBase64; 659 $this->_xh['lv'] = 3; // indicate we've found a value 660 if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) { 661 $v = base64_decode($this->_xh['ac'], true); 662 if ($v === false) { 663 $this->_xh['isf'] = 2; 664 $this->_xh['isf_reason'] = 'Invalid data received in BASE64 value: '. $this->truncateValueForLog($this->_xh['ac']); 665 return; 666 } 667 } else { 668 $v = base64_decode($this->_xh['ac']); 669 if ($v === '' && $this->_xh['ac'] !== '') { 670 // only the empty string should decode to the empty string 671 $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': invalid data received in BASE64 value: ' . 672 $this->truncateValueForLog($this->_xh['ac'])); 673 } 674 } 675 $this->_xh['value'] = $v; 676 break; 677 678 case 'NAME': 679 $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac']; 680 break; 681 682 case 'MEMBER': 683 // add to array in the stack the last element built, unless no VALUE or no NAME were found 684 if ($this->_xh['vt']) { 685 $vscount = count($this->_xh['valuestack']); 686 if ($this->_xh['valuestack'][$vscount - 1]['name'] === null) { 687 if (!$this->handleParsingError('missing NAME inside STRUCT in received xml', __METHOD__)) { 688 return; 689 } 690 $this->_xh['valuestack'][$vscount - 1]['name'] = ''; 691 } 692 $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value']; 693 } else { 694 if (!$this->handleParsingError('missing VALUE inside STRUCT in received xml', __METHOD__)) { 695 return; 696 } 697 } 698 break; 699 700 case 'DATA': 701 $this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty 702 break; 703 704 case 'STRUCT': 705 case 'ARRAY': 706 // fetch out of stack array of values, and promote it to current value 707 $currVal = array_pop($this->_xh['valuestack']); 708 $this->_xh['value'] = $currVal['values']; 709 $this->_xh['vt'] = strtolower($name); 710 if (isset($currVal['php_class'])) { 711 $this->_xh['php_class'] = $currVal['php_class']; 712 } 713 break; 714 715 case 'PARAM': 716 // add to array of params the current value, unless no VALUE was found 717 /// @todo should we also check if there were two VALUE inside the PARAM? 718 if ($this->_xh['vt']) { 719 $this->_xh['params'][] = $this->_xh['value']; 720 $this->_xh['pt'][] = $this->_xh['vt']; 721 } else { 722 if (!$this->handleParsingError('missing VALUE inside PARAM in received xml', __METHOD__)) { 723 return; 724 } 725 } 726 break; 727 728 case 'METHODNAME': 729 if (!preg_match(PhpXmlRpc::$xmlrpc_methodname_format, $this->_xh['ac'])) { 730 if (!$this->handleParsingError('invalid data received in METHODNAME: '. 731 $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { 732 return; 733 } 734 } 735 $methodName = trim($this->_xh['ac']); 736 $this->_xh['method'] = $methodName; 737 // we allow the callback to f.e. give us back a mangled method name by manipulating $this 738 if (isset($this->current_parsing_options['methodname_callback'])) { 739 call_user_func($this->current_parsing_options['methodname_callback'], $methodName, $this, $parser); 740 } 741 break; 742 743 case 'NIL': 744 case 'EX:NIL': 745 // NB: if NIL support is not enabled, parsing stops at element start. So this If is redundant 746 //if ($this->current_parsing_options['xmlrpc_null_extension']) { 747 $this->_xh['vt'] = 'null'; 748 $this->_xh['value'] = null; 749 $this->_xh['lv'] = 3; 750 //} 751 break; 752 753 /// @todo add extra checking: 754 /// - METHODRESPONSE should contain either a PARAMS with a single PARAM, or a FAULT 755 /// - FAULT should contain a single struct with the 2 expected members (check their name and type) 756 /// - METHODCALL should contain a methodname 757 case 'PARAMS': 758 case 'FAULT': 759 case 'METHODCALL': 760 case 'METHODRESPONSE': 761 break; 762 763 default: 764 // End of INVALID ELEMENT 765 // Should we add an assert here for unreachable code? When an invalid element is found in xmlrpc_se, 766 // $this->_xh['isf'] is set to 2... 767 break; 768 } 769 } 770 771 /** 772 * Used in decoding xml-rpc requests/responses without rebuilding xml-rpc Values. 773 * @internal 774 * 775 * @param resource $parser 776 * @param string $name 777 * @return void 778 */ 779 public function xmlrpc_ee_fast($parser, $name) 780 { 781 $this->xmlrpc_ee($parser, $name, 0); 782 } 783 784 /** 785 * Used in decoding xml-rpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime). 786 * @internal 787 * 788 * @param resource $parser 789 * @param string $name 790 * @return void 791 */ 792 public function xmlrpc_ee_epi($parser, $name) 793 { 794 $this->xmlrpc_ee($parser, $name, -1); 795 } 796 797 /** 798 * xml parser handler function for character data. 799 * @internal 800 * 801 * @param resource $parser 802 * @param string $data 803 * @return void 804 */ 805 public function xmlrpc_cd($parser, $data) 806 { 807 // skip processing if xml fault already detected 808 if ($this->_xh['isf'] >= 2) { 809 return; 810 } 811 812 // "lookforvalue == 3" means that we've found an entire value and should discard any further character data 813 if ($this->_xh['lv'] != 3) { 814 $this->_xh['ac'] .= $data; 815 } 816 } 817 818 /** 819 * xml parser handler function for 'other stuff', i.e. not char data or element start/end tag. 820 * In fact, it only gets called on unknown entities... 821 * @internal 822 * 823 * @param $parser 824 * @param string data 825 * @return void 826 */ 827 public function xmlrpc_dh($parser, $data) 828 { 829 // skip processing if xml fault already detected 830 if ($this->_xh['isf'] >= 2) { 831 return; 832 } 833 834 if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') { 835 $this->_xh['ac'] .= $data; 836 } 837 } 838 839 /** 840 * xml charset encoding guessing helper function. 841 * Tries to determine the charset encoding of an XML chunk received over HTTP. 842 * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type, 843 * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non-conforming (legacy?) clients/servers, 844 * which will be most probably using UTF-8 anyway... 845 * In order of importance checks: 846 * 1. http headers 847 * 2. BOM 848 * 3. XML declaration 849 * 4. guesses using mb_detect_encoding() 850 * 851 * @param string $httpHeader the http Content-type header 852 * @param string $xmlChunk xml content buffer 853 * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled). 854 * This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings 855 * @return string the encoding determined. Null if it can't be determined and mbstring is enabled, 856 * PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled 857 * 858 * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!! 859 * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc static variables, to make 860 * the method independent of global state 861 */ 862 public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null) 863 { 864 // discussion: see http://www.yale.edu/pclt/encoding/ 865 // 1 - test if encoding is specified in HTTP HEADERS 866 867 // Details: 868 // LWS: (\13\10)?( |\t)+ 869 // token: (any char but excluded stuff)+ 870 // quoted string: " (any char but double quotes and control chars)* " 871 // header: Content-type = ...; charset=value(; ...)* 872 // where value is of type token, no LWS allowed between 'charset' and value 873 // Note: we do not check for invalid chars in VALUE: 874 // this had better be done using pure ereg as below 875 // Note 2: we might be removing whitespace/tabs that ought to be left in if 876 // the received charset is a quoted string. But nobody uses such charset names... 877 878 /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it? 879 $matches = array(); 880 if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) { 881 return strtoupper(trim($matches[1], " \t\"")); 882 } 883 884 // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern 885 // (source: http://www.w3.org/TR/2000/REC-xml-20001006) 886 // NOTE: actually, according to the spec, even if we find the BOM and determine 887 // an encoding, we should check if there is an encoding specified 888 // in the xml declaration, and verify if they match. 889 /// @todo implement check as described above? 890 /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM) 891 if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) { 892 return 'UCS-4'; 893 } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) { 894 return 'UTF-16'; 895 } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) { 896 return 'UTF-8'; 897 } 898 899 // 3 - test if encoding is specified in the xml declaration 900 /// @todo this regexp will fail if $xmlChunk uses UTF-32/UCS-4, and most likely UTF-16/UCS-2 as well. In that 901 /// case we leave the guesswork up to mbstring - which seems to be able to detect it, starting with php 5.6. 902 /// For lower versions, we could attempt usage of mb_ereg... 903 // Details: 904 // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ 905 // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* 906 if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . 907 '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", 908 $xmlChunk, $matches)) { 909 return strtoupper(substr($matches[2], 1, -1)); 910 } 911 912 // 4 - if mbstring is available, let it do the guesswork 913 if (function_exists('mb_detect_encoding')) { 914 if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) { 915 $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings; 916 } 917 if ($encodingPrefs) { 918 $enc = mb_detect_encoding($xmlChunk, $encodingPrefs); 919 } else { 920 $enc = mb_detect_encoding($xmlChunk); 921 } 922 // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII... 923 // IANA also likes better US-ASCII, so go with it 924 if ($enc == 'ASCII') { 925 $enc = 'US-' . $enc; 926 } 927 928 return $enc; 929 } else { 930 // no encoding specified: as per HTTP1.1 assume it is iso-8859-1? 931 // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types 932 // this should be the standard. And we should be getting text/xml as request and response. 933 // BUT we have to be backward compatible with the lib, which always used UTF-8 as default... 934 return PhpXmlRpc::$xmlrpc_defencoding; 935 } 936 } 937 938 /** 939 * Helper function: checks if an xml chunk has a charset declaration (BOM or in the xml declaration). 940 * 941 * @param string $xmlChunk 942 * @return bool 943 * 944 * @todo rename to hasEncodingDeclaration 945 */ 946 public static function hasEncoding($xmlChunk) 947 { 948 // scan the first bytes of the data for a UTF-16 (or other) BOM pattern 949 // (source: http://www.w3.org/TR/2000/REC-xml-20001006) 950 if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) { 951 return true; 952 } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) { 953 return true; 954 } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) { 955 return true; 956 } 957 958 // test if encoding is specified in the xml declaration 959 // Details: 960 // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ 961 // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* 962 if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . 963 '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", 964 $xmlChunk)) { 965 return true; 966 } 967 968 return false; 969 } 970 971 /** 972 * @param string $message 973 * @param string $method method/file/line info 974 * @return bool false if the caller has to stop parsing 975 */ 976 protected function handleParsingError($message, $method = '') 977 { 978 if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) { 979 $this->_xh['isf'] = 2; 980 $this->_xh['isf_reason'] = ucfirst($message); 981 return false; 982 } else { 983 $this->getLogger()->error('XML-RPC: ' . ($method != '' ? $method . ': ' : '') . $message); 984 return true; 985 } 986 } 987 988 /** 989 * Truncates unsafe data 990 * @param string $data 991 * @return string 992 */ 993 protected function truncateValueForLog($data) 994 { 995 if (strlen($data) > $this->maxLogValueLength) { 996 return substr($data, 0, $this->maxLogValueLength - 3) . '...'; 997 } 998 999 return $data; 1000 } 1001 1002 // *** BC layer *** 1003 1004 /** 1005 * xml parser handler function for opening element tags. 1006 * Used in decoding xml chunks that might represent single xml-rpc values as well as requests, responses. 1007 * @deprecated 1008 * 1009 * @param resource $parser 1010 * @param $name 1011 * @param $attrs 1012 * @return void 1013 */ 1014 public function xmlrpc_se_any($parser, $name, $attrs) 1015 { 1016 // this will be spamming the log if this method is in use... 1017 $this->logDeprecation('Method ' . __METHOD__ . ' is deprecated'); 1018 1019 $this->xmlrpc_se($parser, $name, $attrs, true); 1020 } 1021 1022 public function &__get($name) 1023 { 1024 switch ($name) { 1025 case '_xh': 1026 case 'xmlrpc_valid_parents': 1027 $this->logDeprecation('Getting property XMLParser::' . $name . ' is deprecated'); 1028 return $this->$name; 1029 default: 1030 /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... 1031 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); 1032 trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); 1033 $result = null; 1034 return $result; 1035 } 1036 } 1037 1038 public function __set($name, $value) 1039 { 1040 switch ($name) { 1041 // this should only ever be called by subclasses which overtook `parse()` 1042 case 'accept': 1043 $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated'); 1044 $this->current_parsing_options['accept'] = $value; 1045 break; 1046 case '_xh': 1047 case 'xmlrpc_valid_parents': 1048 $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated'); 1049 $this->$name = $value; 1050 break; 1051 default: 1052 /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... 1053 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); 1054 trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); 1055 } 1056 } 1057 1058 public function __isset($name) 1059 { 1060 switch ($name) { 1061 case 'accept': 1062 $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated'); 1063 return isset($this->current_parsing_options['accept']); 1064 case '_xh': 1065 case 'xmlrpc_valid_parents': 1066 $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated'); 1067 return isset($this->$name); 1068 default: 1069 return false; 1070 } 1071 } 1072 1073 public function __unset($name) 1074 { 1075 switch ($name) { 1076 // q: does this make sense at all? 1077 case 'accept': 1078 $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated'); 1079 unset($this->current_parsing_options['accept']); 1080 break; 1081 case '_xh': 1082 case 'xmlrpc_valid_parents': 1083 $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated'); 1084 unset($this->$name); 1085 break; 1086 default: 1087 /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... 1088 $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); 1089 trigger_error('Undefined property via __unset(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); 1090 } 1091 } 1092 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body