See Release Notes
Long Term Support Release
Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]
1 <?php 2 3 namespace PhpOffice\PhpSpreadsheet\Reader; 4 5 use DateTime; 6 use DateTimeZone; 7 use PhpOffice\PhpSpreadsheet\Cell\AddressHelper; 8 use PhpOffice\PhpSpreadsheet\Cell\Coordinate; 9 use PhpOffice\PhpSpreadsheet\Cell\DataType; 10 use PhpOffice\PhpSpreadsheet\DefinedName; 11 use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; 12 use PhpOffice\PhpSpreadsheet\Reader\Xml\PageSettings; 13 use PhpOffice\PhpSpreadsheet\Reader\Xml\Properties; 14 use PhpOffice\PhpSpreadsheet\Reader\Xml\Style; 15 use PhpOffice\PhpSpreadsheet\RichText\RichText; 16 use PhpOffice\PhpSpreadsheet\Settings; 17 use PhpOffice\PhpSpreadsheet\Shared\Date; 18 use PhpOffice\PhpSpreadsheet\Shared\File; 19 use PhpOffice\PhpSpreadsheet\Shared\StringHelper; 20 use PhpOffice\PhpSpreadsheet\Spreadsheet; 21 use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet; 22 use SimpleXMLElement; 23 24 /** 25 * Reader for SpreadsheetML, the XML schema for Microsoft Office Excel 2003. 26 */ 27 class Xml extends BaseReader 28 { 29 /** 30 * Formats. 31 * 32 * @var array 33 */ 34 protected $styles = []; 35 36 /** 37 * Create a new Excel2003XML Reader instance. 38 */ 39 public function __construct() 40 { 41 parent::__construct(); 42 $this->securityScanner = XmlScanner::getInstance($this); 43 } 44 45 private $fileContents = ''; 46 47 public static function xmlMappings(): array 48 { 49 return array_merge( 50 Style\Fill::FILL_MAPPINGS, 51 Style\Border::BORDER_MAPPINGS 52 ); 53 } 54 55 /** 56 * Can the current IReader read the file? 57 */ 58 public function canRead(string $filename): bool 59 { 60 // Office xmlns:o="urn:schemas-microsoft-com:office:office" 61 // Excel xmlns:x="urn:schemas-microsoft-com:office:excel" 62 // XML Spreadsheet xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" 63 // Spreadsheet component xmlns:c="urn:schemas-microsoft-com:office:component:spreadsheet" 64 // XML schema xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882" 65 // XML data type xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882" 66 // MS-persist recordset xmlns:rs="urn:schemas-microsoft-com:rowset" 67 // Rowset xmlns:z="#RowsetSchema" 68 // 69 70 $signature = [ 71 '<?xml version="1.0"', 72 'xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet', 73 ]; 74 75 // Open file 76 $data = file_get_contents($filename); 77 78 // Why? 79 //$data = str_replace("'", '"', $data); // fix headers with single quote 80 81 $valid = true; 82 foreach ($signature as $match) { 83 // every part of the signature must be present 84 if (strpos($data, $match) === false) { 85 $valid = false; 86 87 break; 88 } 89 } 90 91 // Retrieve charset encoding 92 if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $data, $matches)) { 93 $charSet = strtoupper($matches[1]); 94 if (preg_match('/^ISO-8859-\d[\dL]?$/i', $charSet) === 1) { 95 $data = StringHelper::convertEncoding($data, 'UTF-8', $charSet); 96 $data = (string) preg_replace('/(<?xml.*encoding=[\'"]).*?([\'"].*?>)/um', '$1' . 'UTF-8' . '$2', $data, 1); 97 } 98 } 99 $this->fileContents = $data; 100 101 return $valid; 102 } 103 104 /** 105 * Check if the file is a valid SimpleXML. 106 * 107 * @param string $filename 108 * 109 * @return false|SimpleXMLElement 110 */ 111 public function trySimpleXMLLoadString($filename) 112 { 113 try { 114 $xml = simplexml_load_string( 115 $this->securityScanner->scan($this->fileContents ?: file_get_contents($filename)), 116 'SimpleXMLElement', 117 Settings::getLibXmlLoaderOptions() 118 ); 119 } catch (\Exception $e) { 120 throw new Exception('Cannot load invalid XML file: ' . $filename, 0, $e); 121 } 122 $this->fileContents = ''; 123 124 return $xml; 125 } 126 127 /** 128 * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object. 129 * 130 * @param string $filename 131 * 132 * @return array 133 */ 134 public function listWorksheetNames($filename) 135 { 136 File::assertFile($filename); 137 if (!$this->canRead($filename)) { 138 throw new Exception($filename . ' is an Invalid Spreadsheet file.'); 139 } 140 141 $worksheetNames = []; 142 143 $xml = $this->trySimpleXMLLoadString($filename); 144 if ($xml === false) { 145 throw new Exception("Problem reading {$filename}"); 146 } 147 148 $namespaces = $xml->getNamespaces(true); 149 150 $xml_ss = $xml->children($namespaces['ss']); 151 foreach ($xml_ss->Worksheet as $worksheet) { 152 $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']); 153 $worksheetNames[] = (string) $worksheet_ss['Name']; 154 } 155 156 return $worksheetNames; 157 } 158 159 /** 160 * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). 161 * 162 * @param string $filename 163 * 164 * @return array 165 */ 166 public function listWorksheetInfo($filename) 167 { 168 File::assertFile($filename); 169 if (!$this->canRead($filename)) { 170 throw new Exception($filename . ' is an Invalid Spreadsheet file.'); 171 } 172 173 $worksheetInfo = []; 174 175 $xml = $this->trySimpleXMLLoadString($filename); 176 if ($xml === false) { 177 throw new Exception("Problem reading {$filename}"); 178 } 179 180 $namespaces = $xml->getNamespaces(true); 181 182 $worksheetID = 1; 183 $xml_ss = $xml->children($namespaces['ss']); 184 foreach ($xml_ss->Worksheet as $worksheet) { 185 $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']); 186 187 $tmpInfo = []; 188 $tmpInfo['worksheetName'] = ''; 189 $tmpInfo['lastColumnLetter'] = 'A'; 190 $tmpInfo['lastColumnIndex'] = 0; 191 $tmpInfo['totalRows'] = 0; 192 $tmpInfo['totalColumns'] = 0; 193 194 $tmpInfo['worksheetName'] = "Worksheet_{$worksheetID}"; 195 if (isset($worksheet_ss['Name'])) { 196 $tmpInfo['worksheetName'] = (string) $worksheet_ss['Name']; 197 } 198 199 if (isset($worksheet->Table->Row)) { 200 $rowIndex = 0; 201 202 foreach ($worksheet->Table->Row as $rowData) { 203 $columnIndex = 0; 204 $rowHasData = false; 205 206 foreach ($rowData->Cell as $cell) { 207 if (isset($cell->Data)) { 208 $tmpInfo['lastColumnIndex'] = max($tmpInfo['lastColumnIndex'], $columnIndex); 209 $rowHasData = true; 210 } 211 212 ++$columnIndex; 213 } 214 215 ++$rowIndex; 216 217 if ($rowHasData) { 218 $tmpInfo['totalRows'] = max($tmpInfo['totalRows'], $rowIndex); 219 } 220 } 221 } 222 223 $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); 224 $tmpInfo['totalColumns'] = $tmpInfo['lastColumnIndex'] + 1; 225 226 $worksheetInfo[] = $tmpInfo; 227 ++$worksheetID; 228 } 229 230 return $worksheetInfo; 231 } 232 233 /** 234 * Loads Spreadsheet from file. 235 */ 236 protected function loadSpreadsheetFromFile(string $filename): Spreadsheet 237 { 238 // Create new Spreadsheet 239 $spreadsheet = new Spreadsheet(); 240 $spreadsheet->removeSheetByIndex(0); 241 242 // Load into this instance 243 return $this->loadIntoExisting($filename, $spreadsheet); 244 } 245 246 /** 247 * Loads from file into Spreadsheet instance. 248 * 249 * @param string $filename 250 * 251 * @return Spreadsheet 252 */ 253 public function loadIntoExisting($filename, Spreadsheet $spreadsheet) 254 { 255 File::assertFile($filename); 256 if (!$this->canRead($filename)) { 257 throw new Exception($filename . ' is an Invalid Spreadsheet file.'); 258 } 259 260 $xml = $this->trySimpleXMLLoadString($filename); 261 if ($xml === false) { 262 throw new Exception("Problem reading {$filename}"); 263 } 264 265 $namespaces = $xml->getNamespaces(true); 266 267 (new Properties($spreadsheet))->readProperties($xml, $namespaces); 268 269 $this->styles = (new Style())->parseStyles($xml, $namespaces); 270 271 $worksheetID = 0; 272 $xml_ss = $xml->children($namespaces['ss']); 273 274 /** @var null|SimpleXMLElement $worksheetx */ 275 foreach ($xml_ss->Worksheet as $worksheetx) { 276 $worksheet = $worksheetx ?? new SimpleXMLElement('<xml></xml>'); 277 $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']); 278 279 if ( 280 isset($this->loadSheetsOnly, $worksheet_ss['Name']) && 281 (!in_array($worksheet_ss['Name'], $this->loadSheetsOnly)) 282 ) { 283 continue; 284 } 285 286 // Create new Worksheet 287 $spreadsheet->createSheet(); 288 $spreadsheet->setActiveSheetIndex($worksheetID); 289 $worksheetName = ''; 290 if (isset($worksheet_ss['Name'])) { 291 $worksheetName = (string) $worksheet_ss['Name']; 292 // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in 293 // formula cells... during the load, all formulae should be correct, and we're simply bringing 294 // the worksheet name in line with the formula, not the reverse 295 $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false); 296 } 297 298 // locally scoped defined names 299 if (isset($worksheet->Names[0])) { 300 foreach ($worksheet->Names[0] as $definedName) { 301 $definedName_ss = self::getAttributes($definedName, $namespaces['ss']); 302 $name = (string) $definedName_ss['Name']; 303 $definedValue = (string) $definedName_ss['RefersTo']; 304 $convertedValue = AddressHelper::convertFormulaToA1($definedValue); 305 if ($convertedValue[0] === '=') { 306 $convertedValue = substr($convertedValue, 1); 307 } 308 $spreadsheet->addDefinedName(DefinedName::createInstance($name, $spreadsheet->getActiveSheet(), $convertedValue, true)); 309 } 310 } 311 312 $columnID = 'A'; 313 if (isset($worksheet->Table->Column)) { 314 foreach ($worksheet->Table->Column as $columnData) { 315 $columnData_ss = self::getAttributes($columnData, $namespaces['ss']); 316 if (isset($columnData_ss['Index'])) { 317 $columnID = Coordinate::stringFromColumnIndex((int) $columnData_ss['Index']); 318 } 319 if (isset($columnData_ss['Width'])) { 320 $columnWidth = $columnData_ss['Width']; 321 $spreadsheet->getActiveSheet()->getColumnDimension($columnID)->setWidth($columnWidth / 5.4); 322 } 323 ++$columnID; 324 } 325 } 326 327 $rowID = 1; 328 if (isset($worksheet->Table->Row)) { 329 $additionalMergedCells = 0; 330 foreach ($worksheet->Table->Row as $rowData) { 331 $rowHasData = false; 332 $row_ss = self::getAttributes($rowData, $namespaces['ss']); 333 if (isset($row_ss['Index'])) { 334 $rowID = (int) $row_ss['Index']; 335 } 336 337 $columnID = 'A'; 338 foreach ($rowData->Cell as $cell) { 339 $cell_ss = self::getAttributes($cell, $namespaces['ss']); 340 if (isset($cell_ss['Index'])) { 341 $columnID = Coordinate::stringFromColumnIndex((int) $cell_ss['Index']); 342 } 343 $cellRange = $columnID . $rowID; 344 345 if ($this->getReadFilter() !== null) { 346 if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) { 347 ++$columnID; 348 349 continue; 350 } 351 } 352 353 if (isset($cell_ss['HRef'])) { 354 $spreadsheet->getActiveSheet()->getCell($cellRange)->getHyperlink()->setUrl((string) $cell_ss['HRef']); 355 } 356 357 if ((isset($cell_ss['MergeAcross'])) || (isset($cell_ss['MergeDown']))) { 358 $columnTo = $columnID; 359 if (isset($cell_ss['MergeAcross'])) { 360 $additionalMergedCells += (int) $cell_ss['MergeAcross']; 361 $columnTo = Coordinate::stringFromColumnIndex((int) (Coordinate::columnIndexFromString($columnID) + $cell_ss['MergeAcross'])); 362 } 363 $rowTo = $rowID; 364 if (isset($cell_ss['MergeDown'])) { 365 $rowTo = $rowTo + $cell_ss['MergeDown']; 366 } 367 $cellRange .= ':' . $columnTo . $rowTo; 368 $spreadsheet->getActiveSheet()->mergeCells($cellRange, Worksheet::MERGE_CELL_CONTENT_HIDE); 369 } 370 371 $hasCalculatedValue = false; 372 $cellDataFormula = ''; 373 if (isset($cell_ss['Formula'])) { 374 $cellDataFormula = $cell_ss['Formula']; 375 $hasCalculatedValue = true; 376 } 377 if (isset($cell->Data)) { 378 $cellData = $cell->Data; 379 $cellValue = (string) $cellData; 380 $type = DataType::TYPE_NULL; 381 $cellData_ss = self::getAttributes($cellData, $namespaces['ss']); 382 if (isset($cellData_ss['Type'])) { 383 $cellDataType = $cellData_ss['Type']; 384 switch ($cellDataType) { 385 /* 386 const TYPE_STRING = 's'; 387 const TYPE_FORMULA = 'f'; 388 const TYPE_NUMERIC = 'n'; 389 const TYPE_BOOL = 'b'; 390 const TYPE_NULL = 'null'; 391 const TYPE_INLINE = 'inlineStr'; 392 const TYPE_ERROR = 'e'; 393 */ 394 case 'String': 395 $type = DataType::TYPE_STRING; 396 397 break; 398 case 'Number': 399 $type = DataType::TYPE_NUMERIC; 400 $cellValue = (float) $cellValue; 401 if (floor($cellValue) == $cellValue) { 402 $cellValue = (int) $cellValue; 403 } 404 405 break; 406 case 'Boolean': 407 $type = DataType::TYPE_BOOL; 408 $cellValue = ($cellValue != 0); 409 410 break; 411 case 'DateTime': 412 $type = DataType::TYPE_NUMERIC; 413 $dateTime = new DateTime($cellValue, new DateTimeZone('UTC')); 414 $cellValue = Date::PHPToExcel($dateTime); 415 416 break; 417 case 'Error': 418 $type = DataType::TYPE_ERROR; 419 $hasCalculatedValue = false; 420 421 break; 422 } 423 } 424 425 if ($hasCalculatedValue) { 426 $type = DataType::TYPE_FORMULA; 427 $columnNumber = Coordinate::columnIndexFromString($columnID); 428 $cellDataFormula = AddressHelper::convertFormulaToA1($cellDataFormula, $rowID, $columnNumber); 429 } 430 431 $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValueExplicit((($hasCalculatedValue) ? $cellDataFormula : $cellValue), $type); 432 if ($hasCalculatedValue) { 433 $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setCalculatedValue($cellValue); 434 } 435 $rowHasData = true; 436 } 437 438 if (isset($cell->Comment)) { 439 $this->parseCellComment($cell->Comment, $namespaces, $spreadsheet, $columnID, $rowID); 440 } 441 442 if (isset($cell_ss['StyleID'])) { 443 $style = (string) $cell_ss['StyleID']; 444 if ((isset($this->styles[$style])) && (!empty($this->styles[$style]))) { 445 //if (!$spreadsheet->getActiveSheet()->cellExists($columnID . $rowID)) { 446 // $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValue(null); 447 //} 448 $spreadsheet->getActiveSheet()->getStyle($cellRange) 449 ->applyFromArray($this->styles[$style]); 450 } 451 } 452 ++$columnID; 453 while ($additionalMergedCells > 0) { 454 ++$columnID; 455 --$additionalMergedCells; 456 } 457 } 458 459 if ($rowHasData) { 460 if (isset($row_ss['Height'])) { 461 $rowHeight = $row_ss['Height']; 462 $spreadsheet->getActiveSheet()->getRowDimension($rowID)->setRowHeight((float) $rowHeight); 463 } 464 } 465 466 ++$rowID; 467 } 468 469 if (isset($namespaces['x'])) { 470 $xmlX = $worksheet->children($namespaces['x']); 471 if (isset($xmlX->WorksheetOptions)) { 472 (new PageSettings($xmlX, $namespaces))->loadPageSettings($spreadsheet); 473 } 474 } 475 } 476 ++$worksheetID; 477 } 478 479 // Globally scoped defined names 480 $activeWorksheet = $spreadsheet->setActiveSheetIndex(0); 481 if (isset($xml->Names[0])) { 482 foreach ($xml->Names[0] as $definedName) { 483 $definedName_ss = self::getAttributes($definedName, $namespaces['ss']); 484 $name = (string) $definedName_ss['Name']; 485 $definedValue = (string) $definedName_ss['RefersTo']; 486 $convertedValue = AddressHelper::convertFormulaToA1($definedValue); 487 if ($convertedValue[0] === '=') { 488 $convertedValue = substr($convertedValue, 1); 489 } 490 $spreadsheet->addDefinedName(DefinedName::createInstance($name, $activeWorksheet, $convertedValue)); 491 } 492 } 493 494 // Return 495 return $spreadsheet; 496 } 497 498 protected function parseCellComment( 499 SimpleXMLElement $comment, 500 array $namespaces, 501 Spreadsheet $spreadsheet, 502 string $columnID, 503 int $rowID 504 ): void { 505 $commentAttributes = $comment->attributes($namespaces['ss']); 506 $author = 'unknown'; 507 if (isset($commentAttributes->Author)) { 508 $author = (string) $commentAttributes->Author; 509 } 510 511 $node = $comment->Data->asXML(); 512 $annotation = strip_tags((string) $node); 513 $spreadsheet->getActiveSheet()->getComment($columnID . $rowID) 514 ->setAuthor($author) 515 ->setText($this->parseRichText($annotation)); 516 } 517 518 protected function parseRichText(string $annotation): RichText 519 { 520 $value = new RichText(); 521 522 $value->createText($annotation); 523 524 return $value; 525 } 526 527 private static function getAttributes(?SimpleXMLElement $simple, string $node): SimpleXMLElement 528 { 529 return ($simple === null) 530 ? new SimpleXMLElement('<xml></xml>') 531 : ($simple->attributes($node) ?? new SimpleXMLElement('<xml></xml>')); 532 } 533 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body