Differences Between: [Versions 310 and 400] [Versions 311 and 400] [Versions 39 and 400] [Versions 400 and 401] [Versions 400 and 402] [Versions 400 and 403]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Reader; 4 5 use DateTime; 6 use DateTimeZone; 7 use PhpOffice\PhpSpreadsheet\Cell\AddressHelper; 8 use PhpOffice\PhpSpreadsheet\Cell\Coordinate; 9 use PhpOffice\PhpSpreadsheet\Cell\DataType; 10 use PhpOffice\PhpSpreadsheet\DefinedName; 11 use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; 12 use PhpOffice\PhpSpreadsheet\Reader\Xml\PageSettings; 13 use PhpOffice\PhpSpreadsheet\Reader\Xml\Properties; 14 use PhpOffice\PhpSpreadsheet\Reader\Xml\Style; 15 use PhpOffice\PhpSpreadsheet\RichText\RichText; 16 use PhpOffice\PhpSpreadsheet\Settings; 17 use PhpOffice\PhpSpreadsheet\Shared\Date; 18 use PhpOffice\PhpSpreadsheet\Shared\File; 19 use PhpOffice\PhpSpreadsheet\Shared\StringHelper; 20 use PhpOffice\PhpSpreadsheet\Spreadsheet; 21 use SimpleXMLElement; 22 23 /** 24 * Reader for SpreadsheetML, the XML schema for Microsoft Office Excel 2003. 25 */ 26 class Xml extends BaseReader 27 { 28 /** 29 * Formats. 30 * 31 * @var array 32 */ 33 protected $styles = []; 34 35 /** 36 * Create a new Excel2003XML Reader instance. 37 */ 38 public function __construct() 39 { 40 parent::__construct(); 41 $this->securityScanner = XmlScanner::getInstance($this); 42 } 43 44 private $fileContents = ''; 45 46 public static function xmlMappings(): array 47 { 48 return array_merge( 49 Style\Fill::FILL_MAPPINGS, 50 Style\Border::BORDER_MAPPINGS 51 ); 52 } 53 54 /** 55 * Can the current IReader read the file? 56 */ 57 public function canRead(string $filename): bool 58 { 59 // Office xmlns:o="urn:schemas-microsoft-com:office:office" 60 // Excel xmlns:x="urn:schemas-microsoft-com:office:excel" 61 // XML Spreadsheet xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" 62 // Spreadsheet component xmlns:c="urn:schemas-microsoft-com:office:component:spreadsheet" 63 // XML schema xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882" 64 // XML data type xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882" 65 // MS-persist recordset xmlns:rs="urn:schemas-microsoft-com:rowset" 66 // Rowset xmlns:z="#RowsetSchema" 67 // 68 69 $signature = [ 70 '<?xml version="1.0"', 71 'xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet', 72 ]; 73 74 // Open file 75 $data = file_get_contents($filename); 76 77 // Why? 78 //$data = str_replace("'", '"', $data); // fix headers with single quote 79 80 $valid = true; 81 foreach ($signature as $match) { 82 // every part of the signature must be present 83 if (strpos($data, $match) === false) { 84 $valid = false; 85 86 break; 87 } 88 } 89 90 // Retrieve charset encoding 91 if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $data, $matches)) { 92 $charSet = strtoupper($matches[1]); 93 if (1 == preg_match('/^ISO-8859-\d[\dL]?$/i', $charSet)) { 94 $data = StringHelper::convertEncoding($data, 'UTF-8', $charSet); 95 $data = preg_replace('/(<?xml.*encoding=[\'"]).*?([\'"].*?>)/um', '$1' . 'UTF-8' . '$2', $data, 1); 96 } 97 } 98 $this->fileContents = $data; 99 100 return $valid; 101 } 102 103 /** 104 * Check if the file is a valid SimpleXML. 105 * 106 * @param string $filename 107 * 108 * @return false|SimpleXMLElement 109 */ 110 public function trySimpleXMLLoadString($filename) 111 { 112 try { 113 $xml = simplexml_load_string( 114 $this->securityScanner->scan($this->fileContents ?: file_get_contents($filename)), 115 'SimpleXMLElement', 116 Settings::getLibXmlLoaderOptions() 117 ); 118 } catch (\Exception $e) { 119 throw new Exception('Cannot load invalid XML file: ' . $filename, 0, $e); 120 } 121 $this->fileContents = ''; 122 123 return $xml; 124 } 125 126 /** 127 * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object. 128 * 129 * @param string $filename 130 * 131 * @return array 132 */ 133 public function listWorksheetNames($filename) 134 { 135 File::assertFile($filename); 136 if (!$this->canRead($filename)) { 137 throw new Exception($filename . ' is an Invalid Spreadsheet file.'); 138 } 139 140 $worksheetNames = []; 141 142 $xml = $this->trySimpleXMLLoadString($filename); 143 if ($xml === false) { 144 throw new Exception("Problem reading {$filename}"); 145 } 146 147 $namespaces = $xml->getNamespaces(true); 148 149 $xml_ss = $xml->children($namespaces['ss']); 150 foreach ($xml_ss->Worksheet as $worksheet) { 151 $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']); 152 $worksheetNames[] = (string) $worksheet_ss['Name']; 153 } 154 155 return $worksheetNames; 156 } 157 158 /** 159 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). 160 * 161 * @param string $filename 162 * 163 * @return array 164 */ 165 public function listWorksheetInfo($filename) 166 { 167 File::assertFile($filename); 168 if (!$this->canRead($filename)) { 169 throw new Exception($filename . ' is an Invalid Spreadsheet file.'); 170 } 171 172 $worksheetInfo = []; 173 174 $xml = $this->trySimpleXMLLoadString($filename); 175 if ($xml === false) { 176 throw new Exception("Problem reading {$filename}"); 177 } 178 179 $namespaces = $xml->getNamespaces(true); 180 181 $worksheetID = 1; 182 $xml_ss = $xml->children($namespaces['ss']); 183 foreach ($xml_ss->Worksheet as $worksheet) { 184 $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']); 185 186 $tmpInfo = []; 187 $tmpInfo['worksheetName'] = ''; 188 $tmpInfo['lastColumnLetter'] = 'A'; 189 $tmpInfo['lastColumnIndex'] = 0; 190 $tmpInfo['totalRows'] = 0; 191 $tmpInfo['totalColumns'] = 0; 192 193 $tmpInfo['worksheetName'] = "Worksheet_{$worksheetID}"; 194 if (isset($worksheet_ss['Name'])) { 195 $tmpInfo['worksheetName'] = (string) $worksheet_ss['Name']; 196 } 197 198 if (isset($worksheet->Table->Row)) { 199 $rowIndex = 0; 200 201 foreach ($worksheet->Table->Row as $rowData) { 202 $columnIndex = 0; 203 $rowHasData = false; 204 205 foreach ($rowData->Cell as $cell) { 206 if (isset($cell->Data)) { 207 $tmpInfo['lastColumnIndex'] = max($tmpInfo['lastColumnIndex'], $columnIndex); 208 $rowHasData = true; 209 } 210 211 ++$columnIndex; 212 } 213 214 ++$rowIndex; 215 216 if ($rowHasData) { 217 $tmpInfo['totalRows'] = max($tmpInfo['totalRows'], $rowIndex); 218 } 219 } 220 } 221 222 $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); 223 $tmpInfo['totalColumns'] = $tmpInfo['lastColumnIndex'] + 1; 224 225 $worksheetInfo[] = $tmpInfo; 226 ++$worksheetID; 227 } 228 229 return $worksheetInfo; 230 } 231 232 /** 233 * Loads Spreadsheet from file. 234 * 235 * @return Spreadsheet 236 */ 237 public function load(string $filename, int $flags = 0) 238 { 239 $this->processFlags($flags); 240 241 // Create new Spreadsheet 242 $spreadsheet = new Spreadsheet(); 243 $spreadsheet->removeSheetByIndex(0); 244 245 // Load into this instance 246 return $this->loadIntoExisting($filename, $spreadsheet); 247 } 248 249 /** 250 * Loads from file into Spreadsheet instance. 251 * 252 * @param string $filename 253 * 254 * @return Spreadsheet 255 */ 256 public function loadIntoExisting($filename, Spreadsheet $spreadsheet) 257 { 258 File::assertFile($filename); 259 if (!$this->canRead($filename)) { 260 throw new Exception($filename . ' is an Invalid Spreadsheet file.'); 261 } 262 263 $xml = $this->trySimpleXMLLoadString($filename); 264 if ($xml === false) { 265 throw new Exception("Problem reading {$filename}"); 266 } 267 268 $namespaces = $xml->getNamespaces(true); 269 270 (new Properties($spreadsheet))->readProperties($xml, $namespaces); 271 272 $this->styles = (new Style())->parseStyles($xml, $namespaces); 273 274 $worksheetID = 0; 275 $xml_ss = $xml->children($namespaces['ss']); 276 277 /** @var null|SimpleXMLElement $worksheetx */ 278 foreach ($xml_ss->Worksheet as $worksheetx) { 279 $worksheet = $worksheetx ?? new SimpleXMLElement('<xml></xml>'); 280 $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']); 281 282 if ( 283 isset($this->loadSheetsOnly, $worksheet_ss['Name']) && 284 (!in_array($worksheet_ss['Name'], $this->loadSheetsOnly)) 285 ) { 286 continue; 287 } 288 289 // Create new Worksheet 290 $spreadsheet->createSheet(); 291 $spreadsheet->setActiveSheetIndex($worksheetID); 292 $worksheetName = ''; 293 if (isset($worksheet_ss['Name'])) { 294 $worksheetName = (string) $worksheet_ss['Name']; 295 // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in 296 // formula cells... during the load, all formulae should be correct, and we're simply bringing 297 // the worksheet name in line with the formula, not the reverse 298 $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false); 299 } 300 301 // locally scoped defined names 302 if (isset($worksheet->Names[0])) { 303 foreach ($worksheet->Names[0] as $definedName) { 304 $definedName_ss = self::getAttributes($definedName, $namespaces['ss']); 305 $name = (string) $definedName_ss['Name']; 306 $definedValue = (string) $definedName_ss['RefersTo']; 307 $convertedValue = AddressHelper::convertFormulaToA1($definedValue); 308 if ($convertedValue[0] === '=') { 309 $convertedValue = substr($convertedValue, 1); 310 } 311 $spreadsheet->addDefinedName(DefinedName::createInstance($name, $spreadsheet->getActiveSheet(), $convertedValue, true)); 312 } 313 } 314 315 $columnID = 'A'; 316 if (isset($worksheet->Table->Column)) { 317 foreach ($worksheet->Table->Column as $columnData) { 318 $columnData_ss = self::getAttributes($columnData, $namespaces['ss']); 319 if (isset($columnData_ss['Index'])) { 320 $columnID = Coordinate::stringFromColumnIndex((int) $columnData_ss['Index']); 321 } 322 if (isset($columnData_ss['Width'])) { 323 $columnWidth = $columnData_ss['Width']; 324 $spreadsheet->getActiveSheet()->getColumnDimension($columnID)->setWidth($columnWidth / 5.4); 325 } 326 ++$columnID; 327 } 328 } 329 330 $rowID = 1; 331 if (isset($worksheet->Table->Row)) { 332 $additionalMergedCells = 0; 333 foreach ($worksheet->Table->Row as $rowData) { 334 $rowHasData = false; 335 $row_ss = self::getAttributes($rowData, $namespaces['ss']); 336 if (isset($row_ss['Index'])) { 337 $rowID = (int) $row_ss['Index']; 338 } 339 340 $columnID = 'A'; 341 foreach ($rowData->Cell as $cell) { 342 $cell_ss = self::getAttributes($cell, $namespaces['ss']); 343 if (isset($cell_ss['Index'])) { 344 $columnID = Coordinate::stringFromColumnIndex((int) $cell_ss['Index']); 345 } 346 $cellRange = $columnID . $rowID; 347 348 if ($this->getReadFilter() !== null) { 349 if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) { 350 ++$columnID; 351 352 continue; 353 } 354 } 355 356 if (isset($cell_ss['HRef'])) { 357 $spreadsheet->getActiveSheet()->getCell($cellRange)->getHyperlink()->setUrl((string) $cell_ss['HRef']); 358 } 359 360 if ((isset($cell_ss['MergeAcross'])) || (isset($cell_ss['MergeDown']))) { 361 $columnTo = $columnID; 362 if (isset($cell_ss['MergeAcross'])) { 363 $additionalMergedCells += (int) $cell_ss['MergeAcross']; 364 $columnTo = Coordinate::stringFromColumnIndex((int) (Coordinate::columnIndexFromString($columnID) + $cell_ss['MergeAcross'])); 365 } 366 $rowTo = $rowID; 367 if (isset($cell_ss['MergeDown'])) { 368 $rowTo = $rowTo + $cell_ss['MergeDown']; 369 } 370 $cellRange .= ':' . $columnTo . $rowTo; 371 $spreadsheet->getActiveSheet()->mergeCells($cellRange); 372 } 373 374 $hasCalculatedValue = false; 375 $cellDataFormula = ''; 376 if (isset($cell_ss['Formula'])) { 377 $cellDataFormula = $cell_ss['Formula']; 378 $hasCalculatedValue = true; 379 } 380 if (isset($cell->Data)) { 381 $cellData = $cell->Data; 382 $cellValue = (string) $cellData; 383 $type = DataType::TYPE_NULL; 384 $cellData_ss = self::getAttributes($cellData, $namespaces['ss']); 385 if (isset($cellData_ss['Type'])) { 386 $cellDataType = $cellData_ss['Type']; 387 switch ($cellDataType) { 388 /* 389 const TYPE_STRING = 's'; 390 const TYPE_FORMULA = 'f'; 391 const TYPE_NUMERIC = 'n'; 392 const TYPE_BOOL = 'b'; 393 const TYPE_NULL = 'null'; 394 const TYPE_INLINE = 'inlineStr'; 395 const TYPE_ERROR = 'e'; 396 */ 397 case 'String': 398 $type = DataType::TYPE_STRING; 399 400 break; 401 case 'Number': 402 $type = DataType::TYPE_NUMERIC; 403 $cellValue = (float) $cellValue; 404 if (floor($cellValue) == $cellValue) { 405 $cellValue = (int) $cellValue; 406 } 407 408 break; 409 case 'Boolean': 410 $type = DataType::TYPE_BOOL; 411 $cellValue = ($cellValue != 0); 412 413 break; 414 case 'DateTime': 415 $type = DataType::TYPE_NUMERIC; 416 $dateTime = new DateTime($cellValue, new DateTimeZone('UTC')); 417 $cellValue = Date::PHPToExcel($dateTime); 418 419 break; 420 case 'Error': 421 $type = DataType::TYPE_ERROR; 422 $hasCalculatedValue = false; 423 424 break; 425 } 426 } 427 428 if ($hasCalculatedValue) { 429 $type = DataType::TYPE_FORMULA; 430 $columnNumber = Coordinate::columnIndexFromString($columnID); 431 $cellDataFormula = AddressHelper::convertFormulaToA1($cellDataFormula, $rowID, $columnNumber); 432 } 433 434 $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValueExplicit((($hasCalculatedValue) ? $cellDataFormula : $cellValue), $type); 435 if ($hasCalculatedValue) { 436 $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setCalculatedValue($cellValue); 437 } 438 $rowHasData = true; 439 } 440 441 if (isset($cell->Comment)) { 442 $this->parseCellComment($cell->Comment, $namespaces, $spreadsheet, $columnID, $rowID); 443 } 444 445 if (isset($cell_ss['StyleID'])) { 446 $style = (string) $cell_ss['StyleID']; 447 if ((isset($this->styles[$style])) && (!empty($this->styles[$style]))) { 448 //if (!$spreadsheet->getActiveSheet()->cellExists($columnID . $rowID)) { 449 // $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValue(null); 450 //} 451 $spreadsheet->getActiveSheet()->getStyle($cellRange) 452 ->applyFromArray($this->styles[$style]); 453 } 454 } 455 ++$columnID; 456 while ($additionalMergedCells > 0) { 457 ++$columnID; 458 --$additionalMergedCells; 459 } 460 } 461 462 if ($rowHasData) { 463 if (isset($row_ss['Height'])) { 464 $rowHeight = $row_ss['Height']; 465 $spreadsheet->getActiveSheet()->getRowDimension($rowID)->setRowHeight((float) $rowHeight); 466 } 467 } 468 469 ++$rowID; 470 } 471 472 if (isset($namespaces['x'])) { 473 $xmlX = $worksheet->children($namespaces['x']); 474 if (isset($xmlX->WorksheetOptions)) { 475 (new PageSettings($xmlX, $namespaces))->loadPageSettings($spreadsheet); 476 } 477 } 478 } 479 ++$worksheetID; 480 } 481 482 // Globally scoped defined names 483 $activeWorksheet = $spreadsheet->setActiveSheetIndex(0); 484 if (isset($xml->Names[0])) { 485 foreach ($xml->Names[0] as $definedName) { 486 $definedName_ss = self::getAttributes($definedName, $namespaces['ss']); 487 $name = (string) $definedName_ss['Name']; 488 $definedValue = (string) $definedName_ss['RefersTo']; 489 $convertedValue = AddressHelper::convertFormulaToA1($definedValue); 490 if ($convertedValue[0] === '=') { 491 $convertedValue = substr($convertedValue, 1); 492 } 493 $spreadsheet->addDefinedName(DefinedName::createInstance($name, $activeWorksheet, $convertedValue)); 494 } 495 } 496 497 // Return 498 return $spreadsheet; 499 } 500 501 protected function parseCellComment( 502 SimpleXMLElement $comment, 503 array $namespaces, 504 Spreadsheet $spreadsheet, 505 string $columnID, 506 int $rowID 507 ): void { 508 $commentAttributes = $comment->attributes($namespaces['ss']); 509 $author = 'unknown'; 510 if (isset($commentAttributes->Author)) { 511 $author = (string) $commentAttributes->Author; 512 } 513 514 $node = $comment->Data->asXML(); 515 $annotation = strip_tags((string) $node); 516 $spreadsheet->getActiveSheet()->getComment($columnID . $rowID) 517 ->setAuthor($author) 518 ->setText($this->parseRichText($annotation)); 519 } 520 521 protected function parseRichText(string $annotation): RichText 522 { 523 $value = new RichText(); 524 525 $value->createText($annotation); 526 527 return $value; 528 } 529 530 private static function getAttributes(?SimpleXMLElement $simple, string $node): SimpleXMLElement 531 { 532 return ($simple === null) 533 ? new SimpleXMLElement('<xml></xml>') 534 : ($simple->attributes($node) ?? new SimpleXMLElement('<xml></xml>')); 535 } 536 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body