See Release Notes
Long Term Support Release
<?php namespace PhpXmlRpc\Helper; use PhpXmlRpc\PhpXmlRpc;> use PhpXmlRpc\Traits\DeprecationLogger;use PhpXmlRpc\Value; /** * Deals with parsing the XML. * @see http://xmlrpc.com/spec.md * * @todo implement an interface to allow for alternative implementations * - make access to $_xh protected, return more high-level data structures> * - move the private parts of $_xh to the internal-use parsing-options config* - add parseRequest, parseResponse, parseValue methods * @todo if iconv() or mb_string() are available, we could allow to convert the received xml to a custom charset encoding * while parsing, which is faster than doing it later by going over the rebuilt data structure> * @todo rename? This is an xml-rpc parser, not a generic xml parser... */ > * class XMLParser > * @property array $xmlrpc_valid_parents deprecated - public access left in purely for BC { > * @property int $accept deprecated - (protected) access left in purely for BCconst RETURN_XMLRPCVALS = 'xmlrpcvals';> use DeprecationLogger; const RETURN_EPIVALS = 'epivals'; >const RETURN_PHP = 'phpvals'; const ACCEPT_REQUEST = 1; const ACCEPT_RESPONSE = 2; const ACCEPT_VALUE = 4; const ACCEPT_FAULT = 8;< // Used to store state during parsing and to pass parsing results to callers. < // Quick explanation of components: < // private: < // ac - used to accumulate values < // stack - array with genealogy of xml elements names used to validate nesting of xmlrpc elements < // valuestack - array used for parsing arrays and structs < // lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings < // public: < // isf - used to indicate an xml parsing fault (3), invalid xmlrpc fault (2) or xmlrpc response fault (1) < // isf_reason - used for storing xmlrpc response fault string < // value - used to store the value in responses < // method - used to store method name in requests < // params - used to store parameters in requests < // pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values < // rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode) < public $_xh = array(> /** > * @var int > * The max length beyond which data will get truncated in error messages > */ > protected $maxLogValueLength = 100; > > /** > * @var array > * Used to store state during parsing and to pass parsing results to callers. > * Quick explanation of components: > * private: > * ac - used to accumulate values > * stack - array with genealogy of xml elements names, used to validate nesting of xml-rpc elements > * valuestack - array used for parsing arrays and structs > * lv - used to indicate "looking for a value": implements the logic to allow values with no types to be strings > * (values: 0=not looking, 1=looking, 3=found) > * public: > * isf - used to indicate an xml-rpc response fault (1), invalid xml-rpc fault (2), xml parsing fault (3) > * isf_reason - used for storing xml-rpc response fault string > * value - used to store the value in responses > * method - used to store method name in requests > * params - used to store parameters in requests > * pt - used to store the type of each received parameter. Useful if parameters are automatically decoded to php values > * rt - 'methodcall', 'methodresponse', 'value' or 'fault' (the last one used only in EPI emulation mode) > */ > protected $_xh = array('ac' => '', 'stack' => array(), 'valuestack' => array(),> 'lv' => 0,'isf' => 0, 'isf_reason' => '', 'value' => null, 'method' => false, 'params' => array(), 'pt' => array(), 'rt' => '', );< public $xmlrpc_valid_parents = array(> /** > * @var array[] > */ > protected $xmlrpc_valid_parents = array('VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'), 'BOOLEAN' => array('VALUE'), 'I4' => array('VALUE'), 'I8' => array('VALUE'), 'EX:I8' => array('VALUE'), 'INT' => array('VALUE'), 'STRING' => array('VALUE'), 'DOUBLE' => array('VALUE'), 'DATETIME.ISO8601' => array('VALUE'), 'BASE64' => array('VALUE'), 'MEMBER' => array('STRUCT'), 'NAME' => array('MEMBER'), 'DATA' => array('ARRAY'), 'ARRAY' => array('VALUE'), 'STRUCT' => array('VALUE'), 'PARAM' => array('PARAMS'), 'METHODNAME' => array('METHODCALL'), 'PARAMS' => array('METHODCALL', 'METHODRESPONSE'), 'FAULT' => array('METHODRESPONSE'), 'NIL' => array('VALUE'), // only used when extension activated 'EX:NIL' => array('VALUE'), // only used when extension activated ); /** @var array $parsing_options */ protected $parsing_options = array();>/** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */< protected $accept = 3;> //protected $accept = 3; >/** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */ protected $maxChunkLength = 4194304;> /** @var array > * Used keys: accept, target_charset, methodname_callback, plus the ones set here. /** > * We initialize it partially to help keep BC with subclasses which might have reimplemented `parse()` but not * @param array $options passed to the xml parser > * the element handler methods */ > */ public function __construct(array $options = array()) > protected $current_parsing_options = array( { > 'xmlrpc_null_extension' => false, $this->parsing_options = $options; > 'xmlrpc_return_datetimes' => false, } > 'xmlrpc_reject_invalid_values' => false > );< * @param array $options passed to the xml parser> * @param array $options integer keys: options passed to the inner xml parser > * string keys: > * - target_charset (string) > * - methodname_callback (callable) > * - xmlrpc_null_extension (bool) > * - xmlrpc_return_datetimes (bool) > * - xmlrpc_reject_invalid_values (bool)* @param string $data> * Parses an xml-rpc xml string. Results of the parsing are found in $this->['_xh']. * @param string $returnType > * Logs to the error log any issues which do not cause the parsing to fail. * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE > *< * @param string $returnType> * @param string $returnType self::RETURN_XMLRPCVALS, self::RETURN_PHP, self::RETURN_EPIVALS< * @param array $options> * @param array $options integer-key options are passed to the xml parser, string-key options are used independently. > * These options are added to options received in the constructor. > * Note that if options xmlrpc_null_extension, xmlrpc_return_datetimes and xmlrpc_reject_invalid_values > * are not set, the default settings from PhpXmlRpc\PhpXmlRpc are used > * @return array see the definition of $this->_xh for the meaning of the results > * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions) > * > * @todo refactor? we could 1. return the parsed data structure, and 2. move $returnType and $accept into options > * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc::$xmlrpc_XXX_format, so > * that parsing will be completely independent of global state. Note that it might incur a small perf hit...public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array()) { $this->_xh = array( 'ac' => '', 'stack' => array(), 'valuestack' => array(),> 'lv' => 0,'isf' => 0, 'isf_reason' => '', 'value' => null, 'method' => false, // so we can check later if we got a methodname or not 'params' => array(), 'pt' => array(), 'rt' => '', ); $len = strlen($data);< // we test for empty documents here to save on resource allocation and simply the chunked-parsing loop below> // we test for empty documents here to save on resource allocation and simplify the chunked-parsing loop belowif ($len == 0) { $this->_xh['isf'] = 3; $this->_xh['isf_reason'] = 'XML error 5: empty document';< return;> return $this->_xh;}< $parser = xml_parser_create();> $this->current_parsing_options = array('accept' => $accept);< foreach ($this->parsing_options as $key => $val) { < xml_parser_set_option($parser, $key, $val); < }> $mergedOptions = $this->parsing_options;foreach ($options as $key => $val) {> $mergedOptions[$key] = $val; xml_parser_set_option($parser, $key, $val); > } } > // always set this, in case someone tries to disable it via options... > foreach ($mergedOptions as $key => $val) { xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1); > // q: can php be built without ctype? should we use a regexp? > if (is_string($key) && !ctype_digit($key)) { xml_set_object($parser, $this); > /// @todo on invalid options, throw/error-out instead of logging an error message? > switch($key) { switch($returnType) { > case 'target_charset': case self::RETURN_PHP: > if (function_exists('mb_convert_encoding')) { xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); > $this->current_parsing_options['target_charset'] = $val; break; > } else { case self::RETURN_EPIVALS: > $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": 'target_charset' option is unsupported without mbstring"); xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi'); > } break; > break; default: > xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); > case 'methodname_callback': } > if (is_callable($val)) { > $this->current_parsing_options['methodname_callback'] = $val; xml_set_character_data_handler($parser, 'xmlrpc_cd'); > } else { xml_set_default_handler($parser, 'xmlrpc_dh'); > $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": Callback passed as 'methodname_callback' is not callable"); > } $this->accept = $accept; > break; > // @see ticket #70 - we have to parse big xml docks in chunks to avoid errors > case 'xmlrpc_null_extension': for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) { > case 'xmlrpc_return_datetimes': $chunk = substr($data, $offset, $this->maxChunkLength); > case 'xmlrpc_reject_invalid_values': // error handling: xml not well formed > $this->current_parsing_options[$key] = $val; if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) { > break; $errCode = xml_get_error_code($parser); > $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode), > default: xml_get_current_line_number($parser), xml_get_current_column_number($parser)); > $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": unsupported option: $key"); > } $this->_xh['isf'] = 3; > unset($mergedOptions[$key]); $this->_xh['isf_reason'] = $errStr; > } break; > } } > } > if (!isset($this->current_parsing_options['xmlrpc_null_extension'])) { > $this->current_parsing_options['xmlrpc_null_extension'] = PhpXmlRpc::$xmlrpc_null_extension; xml_parser_free($parser); > } } > if (!isset($this->current_parsing_options['xmlrpc_return_datetimes'])) { > $this->current_parsing_options['xmlrpc_return_datetimes'] = PhpXmlRpc::$xmlrpc_return_datetimes; /** > } * xml parser handler function for opening element tags. > if (!isset($this->current_parsing_options['xmlrpc_reject_invalid_values'])) { * @internal > $this->current_parsing_options['xmlrpc_reject_invalid_values'] = PhpXmlRpc::$xmlrpc_reject_invalid_values; * @param resource $parser > } * @param string $name > * @param $attrs > // NB: we use '' instead of null to force charset detection from the xml declaration * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead > $parser = xml_parser_create(''); */ > public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false) > foreach ($mergedOptions as $key => $val) {{>// if invalid xmlrpc already detected, skip all processing> /// @todo log an error / throw / error-out on unsupported return type if ($this->_xh['isf'] < 2) { > case XMLParser::RETURN_XMLRPCVALS:< $this->accept = $accept; < < // @see ticket #70 - we have to parse big xml docks in chunks to avoid errors> try { > // @see ticket #70 - we have to parse big xml docs in chunks to avoid errors// top level element can only be of 2 types> } /// @todo optimization creep: save this check into a bool variable, instead of using count() every time: > // no need to parse further if we already have a fatal error /// there is only a single top level element in xml anyway > if ($this->_xh['isf'] >= 2) {// BC> /// @todo bump minimum php version to 5.5 and use a finally clause instead of doing cleanup 3 times if ($acceptSingleVals === false) { > } catch (\Exception $e) { $accept = $this->accept; > xml_parser_free($parser); } else { > $this->current_parsing_options = array(); $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE; > /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ? } > throw $e; if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) || > } catch (\Error $e) { ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) || > xml_parser_free($parser); ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) || > $this->current_parsing_options = array(); ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) { > //$this->accept = $prevAccept; $this->_xh['rt'] = strtolower($name); > /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ? } else { > throw $e; $this->_xh['isf'] = 2; > }$this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name;> $this->current_parsing_options = array(); > return; > return $this->_xh;}> *} else {> * @return void // not top level element: see if parent is OK > * $parent = end($this->_xh['stack']); > * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) { > * and remove the checking for $this->_xh['isf'] >= 2 everywhere< // if invalid xmlrpc already detected, skip all processing < if ($this->_xh['isf'] < 2) {> // if invalid xml-rpc already detected, skip all processing > if ($this->_xh['isf'] >= 2) { > return; > }>< $accept = $this->accept;> $accept = $this->current_parsing_options['accept'];}> $this->logDeprecation('Using argument $acceptSingleVals for method ' . __METHOD__ . ' is deprecated');} switch ($name) { // optimize for speed switch cases: most common cases first case 'VALUE': /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element $this->_xh['vt'] = 'value'; // indicator: no value found yet $this->_xh['ac'] = ''; $this->_xh['lv'] = 1; $this->_xh['php_class'] = null; break;>case 'I8': case 'EX:I8': if (PHP_INT_SIZE === 4) { // INVALID ELEMENT: RAISE ISF so that it is later recognized!!! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode"; return; } // fall through voluntarily>case 'I4': case 'INT': case 'STRING': case 'BOOLEAN': case 'DOUBLE': case 'DATETIME.ISO8601': case 'BASE64': if ($this->_xh['vt'] != 'value') { // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; return; } $this->_xh['ac'] = ''; // reset the accumulator break;>case 'STRUCT': case 'ARRAY': if ($this->_xh['vt'] != 'value') { // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; return; } // create an empty array to hold child values, and push it onto appropriate stack< $curVal = array(); < $curVal['values'] = array(); < $curVal['type'] = $name; < // check for out-of-band information to rebuild php objs < // and in case it is found, save it> $curVal = array( > 'values' => array(), > 'type' => $name, > ); > // check for out-of-band information to rebuild php objs and, in case it is found, save itif (@isset($attrs['PHP_CLASS'])) { $curVal['php_class'] = $attrs['PHP_CLASS']; } $this->_xh['valuestack'][] = $curVal; $this->_xh['vt'] = 'data'; // be prepared for a data element next break;>case 'DATA': if ($this->_xh['vt'] != 'data') { // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "found two data elements inside an array element"; return; }>case 'METHODCALL': case 'METHODRESPONSE': case 'PARAMS': // valid elements that add little to processing break;>case 'METHODNAME': case 'NAME': /// @todo we could check for 2 NAME elements inside a MEMBER element $this->_xh['ac'] = ''; break;>case 'FAULT': $this->_xh['isf'] = 1; break;>case 'MEMBER': // set member name to null, in case we do not find in the xml later on< $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = '';> $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = null;//$this->_xh['ac']=''; // Drop trough intentionally>case 'PARAM': // clear value type, so we can check later if no value has been passed for this param/member $this->_xh['vt'] = null; break;>case 'NIL': case 'EX:NIL':< if (PhpXmlRpc::$xmlrpc_null_extension) {> if ($this->current_parsing_options['xmlrpc_null_extension']) {if ($this->_xh['vt'] != 'value') { // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; return; }< $this->_xh['ac'] = ''; // reset the accumulator < break;> // reset the accumulator - q: is this necessary at all here? we don't use it on _ee anyway for NILs > $this->_xh['ac'] = ''; > > } else { > $this->_xh['isf'] = 2; > $this->_xh['isf_reason'] = 'Invalid NIL value received. Support for NIL can be enabled via \\PhpXmlRpc\\PhpXmlRpc::$xmlrpc_null_extension'; > > return;}< // if here, we do not support the <NIL/> extension, so < // drop through intentionally> break; >default:< // INVALID ELEMENT: RAISE ISF so that it is later recognized!!!> // INVALID ELEMENT: RAISE ISF so that it is later recognized > /// @todo feature creep = allow a callback instead$this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name";< break;> > return;} // Save current element name to stack, to validate nesting $this->_xh['stack'][] = $name; /// @todo optimization creep: move this inside the big switch() above if ($name != 'VALUE') { $this->_xh['lv'] = 0; } }< } < < /** < * xml parser handler function for opening element tags. < * Used in decoding xml chunks that might represent single xmlrpc values as well as requests, responses. < * @deprecated < * @param resource $parser < * @param $name < * @param $attrs < */ < public function xmlrpc_se_any($parser, $name, $attrs) < { < $this->xmlrpc_se($parser, $name, $attrs, true); < }/** * xml parser handler function for close element tags. * @internal> ** @param resource $parser * @param string $name * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility> * @return void */ > * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions) public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1) > * { > * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing if ($this->_xh['isf'] < 2) { > * and remove the checking for $this->_xh['isf'] >= 2 everywhere< if ($this->_xh['isf'] < 2) {> if ($this->_xh['isf'] >= 2) { > return; > } >< // NB: if XML validates, correct opening/closing is guaranteed and < // we do not have to check for $name == $currElem.> // NB: if XML validates, correct opening/closing is guaranteed and we do not have to check for $name == $currElem.// we also checked for proper nesting at start of elements... $currElem = array_pop($this->_xh['stack']); switch ($name) { case 'VALUE':< // This if() detects if no scalar was inside <VALUE></VALUE>> // If no scalar was inside <VALUE></VALUE>, it was a string valueif ($this->_xh['vt'] == 'value') { $this->_xh['value'] = $this->_xh['ac']; $this->_xh['vt'] = Value::$xmlrpcString; }> // in case there is charset conversion required, do it here, to catch both cases of string values if ($rebuildXmlrpcvals > 0) { > if (isset($this->current_parsing_options['target_charset']) && $this->_xh['vt'] === Value::$xmlrpcString) { // build the xmlrpc val out of the data received, and substitute it > $this->_xh['value'] = mb_convert_encoding($this->_xh['value'], $this->current_parsing_options['target_charset'], 'UTF-8'); $temp = new Value($this->_xh['value'], $this->_xh['vt']); > } // in case we got info about underlying php class, save it >< // build the xmlrpc val out of the data received, and substitute it> // build the xml-rpc val out of the data received, and substitute it< // in case we got info about underlying php class, save it < // in the object we're rebuilding> // in case we got info about underlying php class, save it in the object we're rebuilding} $this->_xh['value'] = $temp; } elseif ($rebuildXmlrpcvals < 0) { if ($this->_xh['vt'] == Value::$xmlrpcDateTime) { $this->_xh['value'] = (object)array( 'xmlrpc_type' => 'datetime', 'scalar' => $this->_xh['value'], 'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value']) ); } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) { $this->_xh['value'] = (object)array( 'xmlrpc_type' => 'base64', 'scalar' => $this->_xh['value'] ); } } else {< /// @todo this should handle php-serialized objects, < /// since std deserializing is done by php_xmlrpc_decode, < /// which we will not be calling...> /// @todo this should handle php-serialized objects, since std deserializing is done > /// by php_xmlrpc_decode, which we will not be calling...//if (isset($this->_xh['php_class'])) { //} } // check if we are inside an array or struct: // if value just built is inside an array, let's move it into array on the stack $vscount = count($this->_xh['valuestack']); if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; } break;< case 'BOOLEAN': < case 'I4': < case 'I8': < case 'EX:I8': < case 'INT':>case 'STRING':< case 'DOUBLE': < case 'DATETIME.ISO8601': < case 'BASE64': < $this->_xh['vt'] = strtolower($name); < /// @todo: optimization creep - remove the if/elseif cycle below < /// since the case() in which we are already did that < if ($name == 'STRING') { < $this->_xh['value'] = $this->_xh['ac']; < } elseif ($name == 'DATETIME.ISO8601') { < if (!preg_match('/^[0-9]{8}T[0-9]{2}:[0-9]{2}:[0-9]{2}$/', $this->_xh['ac'])) { < Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in DATETIME: ' . $this->_xh['ac']); < } < $this->_xh['vt'] = Value::$xmlrpcDateTime;> $this->_xh['vt'] = Value::$xmlrpcString; > $this->_xh['lv'] = 3; // indicate we've found a value$this->_xh['value'] = $this->_xh['ac'];< } elseif ($name == 'BASE64') { < /// @todo check for failure of base64 decoding / catch warnings < $this->_xh['value'] = base64_decode($this->_xh['ac']); < } elseif ($name == 'BOOLEAN') { < // special case here: we translate boolean 1 or 0 into PHP < // constants true or false. < // Strings 'true' and 'false' are accepted, even though the < // spec never mentions them (see eg. Blogger api docs) < // NB: this simple checks helps a lot sanitizing input, ie no < // security problems around here < if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') == 0) {> break; > > case 'BOOLEAN': > $this->_xh['vt'] = Value::$xmlrpcBoolean; > $this->_xh['lv'] = 3; // indicate we've found a value > // We translate boolean 1 or 0 into PHP constants true or false. Strings 'true' and 'false' are accepted, > // even though the spec never mentions them (see e.g. Blogger api docs) > // NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here > // Note the non-strict type check: it will allow ' 1 ' > /// @todo feature-creep: use a flexible regexp, the same as we do with int, double and datetime. > /// Note that using a regexp would also make this test less sensitive to phpunit shenanigans, and > /// to changes in the way php compares strings (since 8.0, leading and trailing newlines are > /// accepted when deciding if a string numeric...) > if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') === 0) {$this->_xh['value'] = true; } else { // log if receiving something strange, even though we set the value to false anyway< if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') != 0) { < Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': invalid value received in BOOLEAN: ' . $this->_xh['ac']);> /// @todo to be consistent with the other types, we should return a value outside the good-value domain, e.g. NULL > if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') !== 0) { > if (!$this->handleParsingError('invalid data received in BOOLEAN value: ' . > $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { > return; > }} $this->_xh['value'] = false; }< } elseif ($name == 'DOUBLE') { < // we have a DOUBLE < // we must check that only 0123456789-.<space> are characters here < // NOTE: regexp could be much stricter than this... < if (!preg_match('/^[+-eE0123456789 \t.]+$/', $this->_xh['ac'])) { < /// @todo: find a better way of throwing an error than this! < Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in DOUBLE: ' . $this->_xh['ac']);> break; > > case 'EX:I8': > $name = 'i8'; > // fall through voluntarily > case 'I4': > case 'I8': > case 'INT': > // NB: we build the Value object with the original xml element name found, except for ex:i8. The > // `Value::scalarTyp()` function will do some normalization of the data > $this->_xh['vt'] = strtolower($name); > $this->_xh['lv'] = 3; // indicate we've found a value > if (!preg_match(PhpXmlRpc::$xmlrpc_int_format, $this->_xh['ac'])) { > if (!$this->handleParsingError('non numeric data received in INT value: ' . > $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { > return; > } > /// @todo: find a better way of reporting an error value than this! Use NaN?$this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; } else { // it's ok, add it on< $this->_xh['value'] = (double)$this->_xh['ac'];> $this->_xh['value'] = (int)$this->_xh['ac'];}< } else { < // we have an I4/I8/INT < // we must check that only 0123456789-<space> are characters here < if (!preg_match('/^[+-]?[0123456789 \t]+$/', $this->_xh['ac'])) { < /// @todo find a better way of throwing an error than this! < Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': non numeric value received in INT: ' . $this->_xh['ac']);> break; > > case 'DOUBLE': > $this->_xh['vt'] = Value::$xmlrpcDouble; > $this->_xh['lv'] = 3; // indicate we've found a value > if (!preg_match(PhpXmlRpc::$xmlrpc_double_format, $this->_xh['ac'])) { > if (!$this->handleParsingError('non numeric data received in DOUBLE value: ' . > $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { > return; > } >$this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; } else { // it's ok, add it on< $this->_xh['value'] = (int)$this->_xh['ac'];> $this->_xh['value'] = (double)$this->_xh['ac']; > } > break; > > case 'DATETIME.ISO8601': > $this->_xh['vt'] = Value::$xmlrpcDateTime; > $this->_xh['lv'] = 3; // indicate we've found a value > if (!preg_match(PhpXmlRpc::$xmlrpc_datetime_format, $this->_xh['ac'])) { > if (!$this->handleParsingError('invalid data received in DATETIME value: ' . > $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { > return;} }> if ($this->current_parsing_options['xmlrpc_return_datetimes']) { $this->_xh['lv'] = 3; // indicate we've found a value > try { break; > $this->_xh['value'] = new \DateTime($this->_xh['ac']); case 'NAME': > $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac']; > // the default regex used to validate the date string a few lines above should make this case impossible, break; > // but one never knows... case 'MEMBER': > } catch(\Exception $e) { // add to array in the stack the last element built, > // what to do? We can not guarantee that a valid date can be created. We return null... // unless no VALUE was found > if (!$this->handleParsingError('invalid data received in DATETIME value. Error ' . if ($this->_xh['vt']) { > $e->getMessage(), __METHOD__)) { $vscount = count($this->_xh['valuestack']); > return; $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value']; > } } else { > } Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml'); > } else { } > $this->_xh['value'] = $this->_xh['ac']; break; > } case 'DATA': > break; $this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty > break; > case 'BASE64': case 'STRUCT': > $this->_xh['vt'] = Value::$xmlrpcBase64;case 'ARRAY':> if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) { // fetch out of stack array of values, and promote it to current value > $v = base64_decode($this->_xh['ac'], true); $currVal = array_pop($this->_xh['valuestack']); > if ($v === false) { $this->_xh['value'] = $currVal['values']; > $this->_xh['isf'] = 2; $this->_xh['vt'] = strtolower($name); > $this->_xh['isf_reason'] = 'Invalid data received in BASE64 value: '. $this->truncateValueForLog($this->_xh['ac']); if (isset($currVal['php_class'])) { > return; $this->_xh['php_class'] = $currVal['php_class']; > } } > } else { break; > $v = base64_decode($this->_xh['ac']); case 'PARAM': > if ($v === '' && $this->_xh['ac'] !== '') { // add to array of params the current value, > // only the empty string should decode to the empty string // unless no VALUE was found > $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': invalid data received in BASE64 value: ' . if ($this->_xh['vt']) { > $this->truncateValueForLog($this->_xh['ac'])); $this->_xh['params'][] = $this->_xh['value']; > } $this->_xh['pt'][] = $this->_xh['vt']; > } } else { > $this->_xh['value'] = $v;Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml');>}>< // add to array in the stack the last element built, < // unless no VALUE was found> // add to array in the stack the last element built, unless no VALUE or no NAME were found$this->_xh['method'] = preg_replace('/^[\n\r\t ]+/', '', $this->_xh['ac']);> if ($this->_xh['valuestack'][$vscount - 1]['name'] === null) { break; > if (!$this->handleParsingError('missing NAME inside STRUCT in received xml', __METHOD__)) { case 'NIL': > return; case 'EX:NIL': > } if (PhpXmlRpc::$xmlrpc_null_extension) { > $this->_xh['valuestack'][$vscount - 1]['name'] = ''; $this->_xh['vt'] = 'null'; > }< Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside STRUCT in received xml');> if (!$this->handleParsingError('missing VALUE inside STRUCT in received xml', __METHOD__)) { > return; > }$this->_xh['lv'] = 3;>break;>}>< // add to array of params the current value, < // unless no VALUE was found> // add to array of params the current value, unless no VALUE was found > /// @todo should we also check if there were two VALUE inside the PARAM?< Logger::instance()->errorLog('XML-RPC: ' . __METHOD__ . ': missing VALUE inside PARAM in received xml');> if (!$this->handleParsingError('missing VALUE inside PARAM in received xml', __METHOD__)) { > return; > }case 'METHODCALL':>< $this->_xh['method'] = preg_replace('/^[\n\r\t ]+/', '', $this->_xh['ac']);> if (!preg_match(PhpXmlRpc::$xmlrpc_methodname_format, $this->_xh['ac'])) { > if (!$this->handleParsingError('invalid data received in METHODNAME: '. > $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { > return; > } > } > $methodName = trim($this->_xh['ac']); > $this->_xh['method'] = $methodName; > // we allow the callback to f.e. give us back a mangled method name by manipulating $this > if (isset($this->current_parsing_options['methodname_callback'])) { > call_user_func($this->current_parsing_options['methodname_callback'], $methodName, $this, $parser); > }break;>< if (PhpXmlRpc::$xmlrpc_null_extension) {> // NB: if NIL support is not enabled, parsing stops at element start. So this If is redundant > //if ($this->current_parsing_options['xmlrpc_null_extension']) {// End of INVALID ELEMENT!> //}< } < // drop through intentionally if nil extension not enabled> > /// @todo add extra checking: > /// - METHODRESPONSE should contain either a PARAMS with a single PARAM, or a FAULT > /// - FAULT should contain a single struct with the 2 expected members (check their name and type) > /// - METHODCALL should contain a methodname< case 'METHORESPONSE':> case 'METHODRESPONSE':}>< // End of INVALID ELEMENT! < // shall we add an assert here for unreachable code???> // End of INVALID ELEMENT > // Should we add an assert here for unreachable code? When an invalid element is found in xmlrpc_se, > // $this->_xh['isf'] is set to 2...< }< * Used in decoding xmlrpc requests/responses without rebuilding xmlrpc Values.> * Used in decoding xml-rpc requests/responses without rebuilding xml-rpc Values.* @internal> ** @param resource $parser * @param string $name> * @return void*/ public function xmlrpc_ee_fast($parser, $name) { $this->xmlrpc_ee($parser, $name, 0); } /**< * Used in decoding xmlrpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime).> * Used in decoding xml-rpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime).* @internal> ** @param resource $parser * @param string $name> * @return void*/ public function xmlrpc_ee_epi($parser, $name) { $this->xmlrpc_ee($parser, $name, -1); } /** * xml parser handler function for character data. * @internal> ** @param resource $parser * @param string $data> * @return void*/ public function xmlrpc_cd($parser, $data) { // skip processing if xml fault already detected< if ($this->_xh['isf'] < 2) { < // "lookforvalue==3" means that we've found an entire value < // and should discard any further character data> if ($this->_xh['isf'] >= 2) { > return; > } > > // "lookforvalue == 3" means that we've found an entire value and should discard any further character dataif ($this->_xh['lv'] != 3) { $this->_xh['ac'] .= $data; } }< }/**< * xml parser handler function for 'other stuff', ie. not char data or < * element start/end tag. In fact it only gets called on unknown entities...> * xml parser handler function for 'other stuff', i.e. not char data or element start/end tag. > * In fact, it only gets called on unknown entities...* @internal> ** @param $parser * @param string data> * @return void*/ public function xmlrpc_dh($parser, $data) { // skip processing if xml fault already detected< if ($this->_xh['isf'] < 2) {> if ($this->_xh['isf'] >= 2) { > return; > } >if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') { $this->_xh['ac'] .= $data; } }< //return true; < } </** * xml charset encoding guessing helper function. * Tries to determine the charset encoding of an XML chunk received over HTTP. * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type,< * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non conforming (legacy?) clients/servers,> * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non-conforming (legacy?) clients/servers,* which will be most probably using UTF-8 anyway... * In order of importance checks: * 1. http headers * 2. BOM * 3. XML declaration * 4. guesses using mb_detect_encoding() * * @param string $httpHeader the http Content-type header * @param string $xmlChunk xml content buffer * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled). * This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings * @return string the encoding determined. Null if it can't be determined and mbstring is enabled, * PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled * * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!!> * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc static variables, to make */ > * the method independent of global statepublic static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null) { // discussion: see http://www.yale.edu/pclt/encoding/ // 1 - test if encoding is specified in HTTP HEADERS // Details: // LWS: (\13\10)?( |\t)+ // token: (any char but excluded stuff)+ // quoted string: " (any char but double quotes and control chars)* " // header: Content-type = ...; charset=value(; ...)* // where value is of type token, no LWS allowed between 'charset' and value // Note: we do not check for invalid chars in VALUE: // this had better be done using pure ereg as below // Note 2: we might be removing whitespace/tabs that ought to be left in if // the received charset is a quoted string. But nobody uses such charset names... /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it? $matches = array(); if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) { return strtoupper(trim($matches[1], " \t\"")); } // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern // (source: http://www.w3.org/TR/2000/REC-xml-20001006) // NOTE: actually, according to the spec, even if we find the BOM and determine // an encoding, we should check if there is an encoding specified // in the xml declaration, and verify if they match. /// @todo implement check as described above? /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM)< if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {> if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {return 'UCS-4';< } elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {> } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {return 'UTF-16';< } elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) {> } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {return 'UTF-8'; } // 3 - test if encoding is specified in the xml declaration> /// @todo this regexp will fail if $xmlChunk uses UTF-32/UCS-4, and most likely UTF-16/UCS-2 as well. In that // Details: > /// case we leave the guesswork up to mbstring - which seems to be able to detect it, starting with php 5.6. // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ > /// For lower versions, we could attempt usage of mb_ereg...// EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", $xmlChunk, $matches)) { return strtoupper(substr($matches[2], 1, -1)); } // 4 - if mbstring is available, let it do the guesswork< if (extension_loaded('mbstring')) {> if (function_exists('mb_detect_encoding')) {if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) { $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings; } if ($encodingPrefs) { $enc = mb_detect_encoding($xmlChunk, $encodingPrefs); } else { $enc = mb_detect_encoding($xmlChunk); } // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII... // IANA also likes better US-ASCII, so go with it if ($enc == 'ASCII') { $enc = 'US-' . $enc; } return $enc; } else { // no encoding specified: as per HTTP1.1 assume it is iso-8859-1? // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types // this should be the standard. And we should be getting text/xml as request and response. // BUT we have to be backward compatible with the lib, which always used UTF-8 as default... return PhpXmlRpc::$xmlrpc_defencoding; } } /**< * Helper function: checks if an xml chunk as a charset declaration (BOM or in the xml declaration)> * Helper function: checks if an xml chunk has a charset declaration (BOM or in the xml declaration).* * @param string $xmlChunk * @return bool> * */ > * @todo rename to hasEncodingDeclarationpublic static function hasEncoding($xmlChunk) { // scan the first bytes of the data for a UTF-16 (or other) BOM pattern // (source: http://www.w3.org/TR/2000/REC-xml-20001006)< if (preg_match('/^(\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {> if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) {return true;< } elseif (preg_match('/^(\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {> } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) {return true;< } elseif (preg_match('/^(\xEF\xBB\xBF)/', $xmlChunk)) {> } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) {return true; } // test if encoding is specified in the xml declaration // Details: // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/",< $xmlChunk, $matches)) {> $xmlChunk)) {return true; } return false;> } } > } > /** > * @param string $message > * @param string $method method/file/line info > * @return bool false if the caller has to stop parsing > */ > protected function handleParsingError($message, $method = '') > { > if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) { > $this->_xh['isf'] = 2; > $this->_xh['isf_reason'] = ucfirst($message); > return false; > } else { > $this->getLogger()->error('XML-RPC: ' . ($method != '' ? $method . ': ' : '') . $message); > return true; > } > } > > /** > * Truncates unsafe data > * @param string $data > * @return string > */ > protected function truncateValueForLog($data) > { > if (strlen($data) > $this->maxLogValueLength) { > return substr($data, 0, $this->maxLogValueLength - 3) . '...'; > } > > return $data; > } > > // *** BC layer *** > > /** > * xml parser handler function for opening element tags. > * Used in decoding xml chunks that might represent single xml-rpc values as well as requests, responses. > * @deprecated > * > * @param resource $parser > * @param $name > * @param $attrs > * @return void > */ > public function xmlrpc_se_any($parser, $name, $attrs) > { > // this will be spamming the log if this method is in use... > $this->logDeprecation('Method ' . __METHOD__ . ' is deprecated'); > > $this->xmlrpc_se($parser, $name, $attrs, true); > } > > public function &__get($name) > { > switch ($name) { > case '_xh': > case 'xmlrpc_valid_parents': > $this->logDeprecation('Getting property XMLParser::' . $name . ' is deprecated'); > return $this->$name; > default: > /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... > $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); > trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); > $result = null; > return $result; > } > } > > public function __set($name, $value) > { > switch ($name) { > // this should only ever be called by subclasses which overtook `parse()` > case 'accept': > $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated'); > $this->current_parsing_options['accept'] = $value; > break; > case '_xh': > case 'xmlrpc_valid_parents': > $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated'); > $this->$name = $value; > break; > default: > /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... > $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); > trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); > } > } > > public function __isset($name) > { > switch ($name) { > case 'accept': > $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated'); > return isset($this->current_parsing_options['accept']); > case '_xh': > case 'xmlrpc_valid_parents': > $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated'); > return isset($this->$name); > default: > return false; > } > } > > public function __unset($name) > { > switch ($name) { > // q: does this make sense at all? > case 'accept': > $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated'); > unset($this->current_parsing_options['accept']); > break; > case '_xh': > case 'xmlrpc_valid_parents': > $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated'); > unset($this->$name); > break; > default: > /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... > $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); > trigger_error('Undefined property via __unset(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); > }