Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 4.0.x will end 8 May 2023 (12 months).
  • Bug fixes for security issues in 4.0.x will end 13 November 2023 (18 months).
  • PHP version: minimum PHP 7.3.0 Note: the minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is also supported.
<?php

namespace PhpOffice\PhpSpreadsheet\Reader;

use DateTime;
use DateTimeZone;
use PhpOffice\PhpSpreadsheet\Cell\AddressHelper;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\DefinedName;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Reader\Xml\PageSettings;
use PhpOffice\PhpSpreadsheet\Reader\Xml\Properties;
use PhpOffice\PhpSpreadsheet\Reader\Xml\Style;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
use PhpOffice\PhpSpreadsheet\Shared\Date;
use PhpOffice\PhpSpreadsheet\Shared\File;
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
> use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
use SimpleXMLElement; /** * Reader for SpreadsheetML, the XML schema for Microsoft Office Excel 2003. */ class Xml extends BaseReader { /** * Formats. * * @var array */ protected $styles = []; /** * Create a new Excel2003XML Reader instance. */ public function __construct() { parent::__construct(); $this->securityScanner = XmlScanner::getInstance($this); }
> /** @var string */
private $fileContents = ''; public static function xmlMappings(): array { return array_merge( Style\Fill::FILL_MAPPINGS, Style\Border::BORDER_MAPPINGS ); } /** * Can the current IReader read the file? */ public function canRead(string $filename): bool { // Office xmlns:o="urn:schemas-microsoft-com:office:office" // Excel xmlns:x="urn:schemas-microsoft-com:office:excel" // XML Spreadsheet xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" // Spreadsheet component xmlns:c="urn:schemas-microsoft-com:office:component:spreadsheet" // XML schema xmlns:s="uuid:BDC6E3F0-6DA3-11d1-A2A3-00AA00C14882" // XML data type xmlns:dt="uuid:C2F41010-65B3-11d1-A29F-00AA00C14882" // MS-persist recordset xmlns:rs="urn:schemas-microsoft-com:rowset" // Rowset xmlns:z="#RowsetSchema" // $signature = [ '<?xml version="1.0"', 'xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet', ]; // Open file
< $data = file_get_contents($filename);
> $data = file_get_contents($filename) ?: '';
// Why? //$data = str_replace("'", '"', $data); // fix headers with single quote $valid = true; foreach ($signature as $match) { // every part of the signature must be present if (strpos($data, $match) === false) { $valid = false; break; } } // Retrieve charset encoding if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $data, $matches)) { $charSet = strtoupper($matches[1]);
< if (1 == preg_match('/^ISO-8859-\d[\dL]?$/i', $charSet)) {
> if (preg_match('/^ISO-8859-\d[\dL]?$/i', $charSet) === 1) {
$data = StringHelper::convertEncoding($data, 'UTF-8', $charSet);
< $data = preg_replace('/(<?xml.*encoding=[\'"]).*?([\'"].*?>)/um', '$1' . 'UTF-8' . '$2', $data, 1);
> $data = (string) preg_replace('/(<?xml.*encoding=[\'"]).*?([\'"].*?>)/um', '$1' . 'UTF-8' . '$2', $data, 1);
} } $this->fileContents = $data; return $valid; } /** * Check if the file is a valid SimpleXML. * * @param string $filename * * @return false|SimpleXMLElement */ public function trySimpleXMLLoadString($filename) { try { $xml = simplexml_load_string(
< $this->securityScanner->scan($this->fileContents ?: file_get_contents($filename)),
> $this->getSecurityScannerOrThrow()->scan($this->fileContents ?: file_get_contents($filename)),
'SimpleXMLElement', Settings::getLibXmlLoaderOptions() ); } catch (\Exception $e) { throw new Exception('Cannot load invalid XML file: ' . $filename, 0, $e); } $this->fileContents = ''; return $xml; } /** * Reads names of the worksheets from a file, without parsing the whole file to a Spreadsheet object. * * @param string $filename * * @return array */ public function listWorksheetNames($filename) { File::assertFile($filename); if (!$this->canRead($filename)) { throw new Exception($filename . ' is an Invalid Spreadsheet file.'); } $worksheetNames = []; $xml = $this->trySimpleXMLLoadString($filename); if ($xml === false) { throw new Exception("Problem reading {$filename}"); } $namespaces = $xml->getNamespaces(true); $xml_ss = $xml->children($namespaces['ss']); foreach ($xml_ss->Worksheet as $worksheet) { $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']); $worksheetNames[] = (string) $worksheet_ss['Name']; } return $worksheetNames; } /** * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). * * @param string $filename * * @return array */ public function listWorksheetInfo($filename) { File::assertFile($filename); if (!$this->canRead($filename)) { throw new Exception($filename . ' is an Invalid Spreadsheet file.'); } $worksheetInfo = []; $xml = $this->trySimpleXMLLoadString($filename); if ($xml === false) { throw new Exception("Problem reading {$filename}"); } $namespaces = $xml->getNamespaces(true); $worksheetID = 1; $xml_ss = $xml->children($namespaces['ss']); foreach ($xml_ss->Worksheet as $worksheet) { $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']); $tmpInfo = []; $tmpInfo['worksheetName'] = ''; $tmpInfo['lastColumnLetter'] = 'A'; $tmpInfo['lastColumnIndex'] = 0; $tmpInfo['totalRows'] = 0; $tmpInfo['totalColumns'] = 0; $tmpInfo['worksheetName'] = "Worksheet_{$worksheetID}"; if (isset($worksheet_ss['Name'])) { $tmpInfo['worksheetName'] = (string) $worksheet_ss['Name']; } if (isset($worksheet->Table->Row)) { $rowIndex = 0; foreach ($worksheet->Table->Row as $rowData) { $columnIndex = 0; $rowHasData = false; foreach ($rowData->Cell as $cell) { if (isset($cell->Data)) { $tmpInfo['lastColumnIndex'] = max($tmpInfo['lastColumnIndex'], $columnIndex); $rowHasData = true; } ++$columnIndex; } ++$rowIndex; if ($rowHasData) { $tmpInfo['totalRows'] = max($tmpInfo['totalRows'], $rowIndex); } } } $tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['lastColumnIndex'] + 1); $tmpInfo['totalColumns'] = $tmpInfo['lastColumnIndex'] + 1; $worksheetInfo[] = $tmpInfo; ++$worksheetID; } return $worksheetInfo; } /** * Loads Spreadsheet from file.
< * < * @return Spreadsheet
*/
< public function load(string $filename, int $flags = 0)
> protected function loadSpreadsheetFromFile(string $filename): Spreadsheet
{
< $this->processFlags($flags); <
// Create new Spreadsheet $spreadsheet = new Spreadsheet(); $spreadsheet->removeSheetByIndex(0); // Load into this instance return $this->loadIntoExisting($filename, $spreadsheet); } /** * Loads from file into Spreadsheet instance. * * @param string $filename * * @return Spreadsheet */ public function loadIntoExisting($filename, Spreadsheet $spreadsheet) { File::assertFile($filename); if (!$this->canRead($filename)) { throw new Exception($filename . ' is an Invalid Spreadsheet file.'); } $xml = $this->trySimpleXMLLoadString($filename); if ($xml === false) { throw new Exception("Problem reading {$filename}"); } $namespaces = $xml->getNamespaces(true); (new Properties($spreadsheet))->readProperties($xml, $namespaces); $this->styles = (new Style())->parseStyles($xml, $namespaces); $worksheetID = 0; $xml_ss = $xml->children($namespaces['ss']); /** @var null|SimpleXMLElement $worksheetx */ foreach ($xml_ss->Worksheet as $worksheetx) { $worksheet = $worksheetx ?? new SimpleXMLElement('<xml></xml>'); $worksheet_ss = self::getAttributes($worksheet, $namespaces['ss']); if ( isset($this->loadSheetsOnly, $worksheet_ss['Name']) &&
< (!in_array($worksheet_ss['Name'], $this->loadSheetsOnly))
> (!in_array($worksheet_ss['Name'], /** @scrutinizer ignore-type */ $this->loadSheetsOnly))
) { continue; } // Create new Worksheet $spreadsheet->createSheet(); $spreadsheet->setActiveSheetIndex($worksheetID); $worksheetName = ''; if (isset($worksheet_ss['Name'])) { $worksheetName = (string) $worksheet_ss['Name']; // Use false for $updateFormulaCellReferences to prevent adjustment of worksheet references in // formula cells... during the load, all formulae should be correct, and we're simply bringing // the worksheet name in line with the formula, not the reverse $spreadsheet->getActiveSheet()->setTitle($worksheetName, false, false); } // locally scoped defined names if (isset($worksheet->Names[0])) { foreach ($worksheet->Names[0] as $definedName) { $definedName_ss = self::getAttributes($definedName, $namespaces['ss']); $name = (string) $definedName_ss['Name']; $definedValue = (string) $definedName_ss['RefersTo']; $convertedValue = AddressHelper::convertFormulaToA1($definedValue); if ($convertedValue[0] === '=') { $convertedValue = substr($convertedValue, 1); } $spreadsheet->addDefinedName(DefinedName::createInstance($name, $spreadsheet->getActiveSheet(), $convertedValue, true)); } } $columnID = 'A'; if (isset($worksheet->Table->Column)) { foreach ($worksheet->Table->Column as $columnData) { $columnData_ss = self::getAttributes($columnData, $namespaces['ss']); if (isset($columnData_ss['Index'])) { $columnID = Coordinate::stringFromColumnIndex((int) $columnData_ss['Index']); } if (isset($columnData_ss['Width'])) { $columnWidth = $columnData_ss['Width']; $spreadsheet->getActiveSheet()->getColumnDimension($columnID)->setWidth($columnWidth / 5.4); } ++$columnID; } } $rowID = 1; if (isset($worksheet->Table->Row)) { $additionalMergedCells = 0; foreach ($worksheet->Table->Row as $rowData) { $rowHasData = false; $row_ss = self::getAttributes($rowData, $namespaces['ss']); if (isset($row_ss['Index'])) { $rowID = (int) $row_ss['Index']; } $columnID = 'A'; foreach ($rowData->Cell as $cell) { $cell_ss = self::getAttributes($cell, $namespaces['ss']); if (isset($cell_ss['Index'])) { $columnID = Coordinate::stringFromColumnIndex((int) $cell_ss['Index']); } $cellRange = $columnID . $rowID; if ($this->getReadFilter() !== null) { if (!$this->getReadFilter()->readCell($columnID, $rowID, $worksheetName)) { ++$columnID; continue; } } if (isset($cell_ss['HRef'])) { $spreadsheet->getActiveSheet()->getCell($cellRange)->getHyperlink()->setUrl((string) $cell_ss['HRef']); } if ((isset($cell_ss['MergeAcross'])) || (isset($cell_ss['MergeDown']))) { $columnTo = $columnID; if (isset($cell_ss['MergeAcross'])) { $additionalMergedCells += (int) $cell_ss['MergeAcross']; $columnTo = Coordinate::stringFromColumnIndex((int) (Coordinate::columnIndexFromString($columnID) + $cell_ss['MergeAcross'])); } $rowTo = $rowID; if (isset($cell_ss['MergeDown'])) { $rowTo = $rowTo + $cell_ss['MergeDown']; } $cellRange .= ':' . $columnTo . $rowTo;
< $spreadsheet->getActiveSheet()->mergeCells($cellRange);
> $spreadsheet->getActiveSheet()->mergeCells($cellRange, Worksheet::MERGE_CELL_CONTENT_HIDE);
} $hasCalculatedValue = false; $cellDataFormula = ''; if (isset($cell_ss['Formula'])) { $cellDataFormula = $cell_ss['Formula']; $hasCalculatedValue = true; } if (isset($cell->Data)) { $cellData = $cell->Data; $cellValue = (string) $cellData; $type = DataType::TYPE_NULL; $cellData_ss = self::getAttributes($cellData, $namespaces['ss']); if (isset($cellData_ss['Type'])) { $cellDataType = $cellData_ss['Type']; switch ($cellDataType) { /* const TYPE_STRING = 's'; const TYPE_FORMULA = 'f'; const TYPE_NUMERIC = 'n'; const TYPE_BOOL = 'b'; const TYPE_NULL = 'null'; const TYPE_INLINE = 'inlineStr'; const TYPE_ERROR = 'e'; */ case 'String': $type = DataType::TYPE_STRING; break; case 'Number': $type = DataType::TYPE_NUMERIC; $cellValue = (float) $cellValue; if (floor($cellValue) == $cellValue) { $cellValue = (int) $cellValue; } break; case 'Boolean': $type = DataType::TYPE_BOOL; $cellValue = ($cellValue != 0); break; case 'DateTime': $type = DataType::TYPE_NUMERIC; $dateTime = new DateTime($cellValue, new DateTimeZone('UTC')); $cellValue = Date::PHPToExcel($dateTime); break; case 'Error': $type = DataType::TYPE_ERROR; $hasCalculatedValue = false; break; } } if ($hasCalculatedValue) { $type = DataType::TYPE_FORMULA; $columnNumber = Coordinate::columnIndexFromString($columnID); $cellDataFormula = AddressHelper::convertFormulaToA1($cellDataFormula, $rowID, $columnNumber); } $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValueExplicit((($hasCalculatedValue) ? $cellDataFormula : $cellValue), $type); if ($hasCalculatedValue) { $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setCalculatedValue($cellValue); } $rowHasData = true; } if (isset($cell->Comment)) { $this->parseCellComment($cell->Comment, $namespaces, $spreadsheet, $columnID, $rowID); } if (isset($cell_ss['StyleID'])) { $style = (string) $cell_ss['StyleID']; if ((isset($this->styles[$style])) && (!empty($this->styles[$style]))) { //if (!$spreadsheet->getActiveSheet()->cellExists($columnID . $rowID)) { // $spreadsheet->getActiveSheet()->getCell($columnID . $rowID)->setValue(null); //} $spreadsheet->getActiveSheet()->getStyle($cellRange) ->applyFromArray($this->styles[$style]); } } ++$columnID; while ($additionalMergedCells > 0) { ++$columnID; --$additionalMergedCells; } } if ($rowHasData) { if (isset($row_ss['Height'])) { $rowHeight = $row_ss['Height']; $spreadsheet->getActiveSheet()->getRowDimension($rowID)->setRowHeight((float) $rowHeight); } } ++$rowID; } if (isset($namespaces['x'])) { $xmlX = $worksheet->children($namespaces['x']); if (isset($xmlX->WorksheetOptions)) { (new PageSettings($xmlX, $namespaces))->loadPageSettings($spreadsheet); } } } ++$worksheetID; } // Globally scoped defined names $activeWorksheet = $spreadsheet->setActiveSheetIndex(0); if (isset($xml->Names[0])) { foreach ($xml->Names[0] as $definedName) { $definedName_ss = self::getAttributes($definedName, $namespaces['ss']); $name = (string) $definedName_ss['Name']; $definedValue = (string) $definedName_ss['RefersTo']; $convertedValue = AddressHelper::convertFormulaToA1($definedValue); if ($convertedValue[0] === '=') { $convertedValue = substr($convertedValue, 1); } $spreadsheet->addDefinedName(DefinedName::createInstance($name, $activeWorksheet, $convertedValue)); } } // Return return $spreadsheet; } protected function parseCellComment( SimpleXMLElement $comment, array $namespaces, Spreadsheet $spreadsheet, string $columnID, int $rowID ): void { $commentAttributes = $comment->attributes($namespaces['ss']); $author = 'unknown'; if (isset($commentAttributes->Author)) { $author = (string) $commentAttributes->Author; } $node = $comment->Data->asXML(); $annotation = strip_tags((string) $node); $spreadsheet->getActiveSheet()->getComment($columnID . $rowID) ->setAuthor($author) ->setText($this->parseRichText($annotation)); } protected function parseRichText(string $annotation): RichText { $value = new RichText(); $value->createText($annotation); return $value; } private static function getAttributes(?SimpleXMLElement $simple, string $node): SimpleXMLElement { return ($simple === null) ? new SimpleXMLElement('<xml></xml>') : ($simple->attributes($node) ?? new SimpleXMLElement('<xml></xml>')); } }