Search moodle.org's
Developer Documentation

See Release Notes
Long Term Support Release

  • Bug fixes for general core bugs in 4.1.x will end 13 November 2023 (12 months).
  • Bug fixes for security issues in 4.1.x will end 10 November 2025 (36 months).
  • PHP version: minimum PHP 7.4.0 Note: minimum PHP version has increased since Moodle 4.0. PHP 8.0.x is supported too.

Differences Between: [Versions 310 and 401] [Versions 311 and 401] [Versions 39 and 401] [Versions 400 and 401] [Versions 401 and 402] [Versions 401 and 403]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader;
   4  
   5  use DOMDocument;
   6  use DOMElement;
   7  use DOMNode;
   8  use DOMText;
   9  use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
  10  use PhpOffice\PhpSpreadsheet\Helper\Dimension as CssDimension;
  11  use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
  12  use PhpOffice\PhpSpreadsheet\Spreadsheet;
  13  use PhpOffice\PhpSpreadsheet\Style\Border;
  14  use PhpOffice\PhpSpreadsheet\Style\Color;
  15  use PhpOffice\PhpSpreadsheet\Style\Fill;
  16  use PhpOffice\PhpSpreadsheet\Style\Font;
  17  use PhpOffice\PhpSpreadsheet\Style\Style;
  18  use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
  19  use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
  20  use Throwable;
  21  
  22  class Html extends BaseReader
  23  {
  24      /**
  25       * Sample size to read to determine if it's HTML or not.
  26       */
  27      const TEST_SAMPLE_SIZE = 2048;
  28  
  29      /**
  30       * Input encoding.
  31       *
  32       * @var string
  33       */
  34      protected $inputEncoding = 'ANSI';
  35  
  36      /**
  37       * Sheet index to read.
  38       *
  39       * @var int
  40       */
  41      protected $sheetIndex = 0;
  42  
  43      /**
  44       * Formats.
  45       *
  46       * @var array
  47       */
  48      protected $formats = [
  49          'h1' => [
  50              'font' => [
  51                  'bold' => true,
  52                  'size' => 24,
  53              ],
  54          ], //    Bold, 24pt
  55          'h2' => [
  56              'font' => [
  57                  'bold' => true,
  58                  'size' => 18,
  59              ],
  60          ], //    Bold, 18pt
  61          'h3' => [
  62              'font' => [
  63                  'bold' => true,
  64                  'size' => 13.5,
  65              ],
  66          ], //    Bold, 13.5pt
  67          'h4' => [
  68              'font' => [
  69                  'bold' => true,
  70                  'size' => 12,
  71              ],
  72          ], //    Bold, 12pt
  73          'h5' => [
  74              'font' => [
  75                  'bold' => true,
  76                  'size' => 10,
  77              ],
  78          ], //    Bold, 10pt
  79          'h6' => [
  80              'font' => [
  81                  'bold' => true,
  82                  'size' => 7.5,
  83              ],
  84          ], //    Bold, 7.5pt
  85          'a' => [
  86              'font' => [
  87                  'underline' => true,
  88                  'color' => [
  89                      'argb' => Color::COLOR_BLUE,
  90                  ],
  91              ],
  92          ], //    Blue underlined
  93          'hr' => [
  94              'borders' => [
  95                  'bottom' => [
  96                      'borderStyle' => Border::BORDER_THIN,
  97                      'color' => [
  98                          Color::COLOR_BLACK,
  99                      ],
 100                  ],
 101              ],
 102          ], //    Bottom border
 103          'strong' => [
 104              'font' => [
 105                  'bold' => true,
 106              ],
 107          ], //    Bold
 108          'b' => [
 109              'font' => [
 110                  'bold' => true,
 111              ],
 112          ], //    Bold
 113          'i' => [
 114              'font' => [
 115                  'italic' => true,
 116              ],
 117          ], //    Italic
 118          'em' => [
 119              'font' => [
 120                  'italic' => true,
 121              ],
 122          ], //    Italic
 123      ];
 124  
 125      /** @var array */
 126      protected $rowspan = [];
 127  
 128      /**
 129       * Create a new HTML Reader instance.
 130       */
 131      public function __construct()
 132      {
 133          parent::__construct();
 134          $this->securityScanner = XmlScanner::getInstance($this);
 135      }
 136  
 137      /**
 138       * Validate that the current file is an HTML file.
 139       */
 140      public function canRead(string $filename): bool
 141      {
 142          // Check if file exists
 143          try {
 144              $this->openFile($filename);
 145          } catch (Exception $e) {
 146              return false;
 147          }
 148  
 149          $beginning = $this->readBeginning();
 150          $startWithTag = self::startsWithTag($beginning);
 151          $containsTags = self::containsTags($beginning);
 152          $endsWithTag = self::endsWithTag($this->readEnding());
 153  
 154          fclose($this->fileHandle);
 155  
 156          return $startWithTag && $containsTags && $endsWithTag;
 157      }
 158  
 159      private function readBeginning(): string
 160      {
 161          fseek($this->fileHandle, 0);
 162  
 163          return (string) fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
 164      }
 165  
 166      private function readEnding(): string
 167      {
 168          $meta = stream_get_meta_data($this->fileHandle);
 169          $filename = $meta['uri'];
 170  
 171          $size = (int) filesize($filename);
 172          if ($size === 0) {
 173              return '';
 174          }
 175  
 176          $blockSize = self::TEST_SAMPLE_SIZE;
 177          if ($size < $blockSize) {
 178              $blockSize = $size;
 179          }
 180  
 181          fseek($this->fileHandle, $size - $blockSize);
 182  
 183          return (string) fread($this->fileHandle, $blockSize);
 184      }
 185  
 186      private static function startsWithTag(string $data): bool
 187      {
 188          return '<' === substr(trim($data), 0, 1);
 189      }
 190  
 191      private static function endsWithTag(string $data): bool
 192      {
 193          return '>' === substr(trim($data), -1, 1);
 194      }
 195  
 196      private static function containsTags(string $data): bool
 197      {
 198          return strlen($data) !== strlen(strip_tags($data));
 199      }
 200  
 201      /**
 202       * Loads Spreadsheet from file.
 203       */
 204      public function loadSpreadsheetFromFile(string $filename): Spreadsheet
 205      {
 206          // Create new Spreadsheet
 207          $spreadsheet = new Spreadsheet();
 208  
 209          // Load into this instance
 210          return $this->loadIntoExisting($filename, $spreadsheet);
 211      }
 212  
 213      /**
 214       * Set input encoding.
 215       *
 216       * @param string $inputEncoding Input encoding, eg: 'ANSI'
 217       *
 218       * @return $this
 219       *
 220       * @codeCoverageIgnore
 221       *
 222       * @deprecated no use is made of this property
 223       */
 224      public function setInputEncoding($inputEncoding)
 225      {
 226          $this->inputEncoding = $inputEncoding;
 227  
 228          return $this;
 229      }
 230  
 231      /**
 232       * Get input encoding.
 233       *
 234       * @return string
 235       *
 236       * @codeCoverageIgnore
 237       *
 238       * @deprecated no use is made of this property
 239       */
 240      public function getInputEncoding()
 241      {
 242          return $this->inputEncoding;
 243      }
 244  
 245      //    Data Array used for testing only, should write to Spreadsheet object on completion of tests
 246  
 247      /** @var array */
 248      protected $dataArray = [];
 249  
 250      /** @var int */
 251      protected $tableLevel = 0;
 252  
 253      /** @var array */
 254      protected $nestedColumn = ['A'];
 255  
 256      protected function setTableStartColumn(string $column): string
 257      {
 258          if ($this->tableLevel == 0) {
 259              $column = 'A';
 260          }
 261          ++$this->tableLevel;
 262          $this->nestedColumn[$this->tableLevel] = $column;
 263  
 264          return $this->nestedColumn[$this->tableLevel];
 265      }
 266  
 267      protected function getTableStartColumn(): string
 268      {
 269          return $this->nestedColumn[$this->tableLevel];
 270      }
 271  
 272      protected function releaseTableStartColumn(): string
 273      {
 274          --$this->tableLevel;
 275  
 276          return array_pop($this->nestedColumn);
 277      }
 278  
 279      /**
 280       * Flush cell.
 281       *
 282       * @param string $column
 283       * @param int|string $row
 284       * @param mixed $cellContent
 285       */
 286      protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent): void
 287      {
 288          if (is_string($cellContent)) {
 289              //    Simple String content
 290              if (trim($cellContent) > '') {
 291                  //    Only actually write it if there's content in the string
 292                  //    Write to worksheet to be done here...
 293                  //    ... we return the cell so we can mess about with styles more easily
 294                  $sheet->setCellValue($column . $row, $cellContent);
 295                  $this->dataArray[$row][$column] = $cellContent;
 296              }
 297          } else {
 298              //    We have a Rich Text run
 299              //    TODO
 300              $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
 301          }
 302          $cellContent = (string) '';
 303      }
 304  
 305      private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
 306      {
 307          $attributeArray = [];
 308          foreach (($child->attributes ?? []) as $attribute) {
 309              $attributeArray[$attribute->name] = $attribute->value;
 310          }
 311  
 312          if ($child->nodeName === 'body') {
 313              $row = 1;
 314              $column = 'A';
 315              $cellContent = '';
 316              $this->tableLevel = 0;
 317              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 318          } else {
 319              $this->processDomElementTitle($sheet, $row, $column, $cellContent, $child, $attributeArray);
 320          }
 321      }
 322  
 323      private function processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 324      {
 325          if ($child->nodeName === 'title') {
 326              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 327              $sheet->setTitle($cellContent, true, true);
 328              $cellContent = '';
 329          } else {
 330              $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray);
 331          }
 332      }
 333  
 334      private const SPAN_ETC = ['span', 'div', 'font', 'i', 'em', 'strong', 'b'];
 335  
 336      private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 337      {
 338          if (in_array((string) $child->nodeName, self::SPAN_ETC, true)) {
 339              if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
 340                  $sheet->getComment($column . $row)
 341                      ->getText()
 342                      ->createTextRun($child->textContent);
 343              } else {
 344                  $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 345              }
 346  
 347              if (isset($this->formats[$child->nodeName])) {
 348                  $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
 349              }
 350          } else {
 351              $this->processDomElementHr($sheet, $row, $column, $cellContent, $child, $attributeArray);
 352          }
 353      }
 354  
 355      private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 356      {
 357          if ($child->nodeName === 'hr') {
 358              $this->flushCell($sheet, $column, $row, $cellContent);
 359              ++$row;
 360              if (isset($this->formats[$child->nodeName])) {
 361                  $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
 362              }
 363              ++$row;
 364          }
 365          // fall through to br
 366          $this->processDomElementBr($sheet, $row, $column, $cellContent, $child, $attributeArray);
 367      }
 368  
 369      private function processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 370      {
 371          if ($child->nodeName === 'br' || $child->nodeName === 'hr') {
 372              if ($this->tableLevel > 0) {
 373                  //    If we're inside a table, replace with a \n and set the cell to wrap
 374                  $cellContent .= "\n";
 375                  $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
 376              } else {
 377                  //    Otherwise flush our existing content and move the row cursor on
 378                  $this->flushCell($sheet, $column, $row, $cellContent);
 379                  ++$row;
 380              }
 381          } else {
 382              $this->processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray);
 383          }
 384      }
 385  
 386      private function processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 387      {
 388          if ($child->nodeName === 'a') {
 389              foreach ($attributeArray as $attributeName => $attributeValue) {
 390                  switch ($attributeName) {
 391                      case 'href':
 392                          $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
 393                          if (isset($this->formats[$child->nodeName])) {
 394                              $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
 395                          }
 396  
 397                          break;
 398                      case 'class':
 399                          if ($attributeValue === 'comment-indicator') {
 400                              break; // Ignore - it's just a red square.
 401                          }
 402                  }
 403              }
 404              // no idea why this should be needed
 405              //$cellContent .= ' ';
 406              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 407          } else {
 408              $this->processDomElementH1Etc($sheet, $row, $column, $cellContent, $child, $attributeArray);
 409          }
 410      }
 411  
 412      private const H1_ETC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p'];
 413  
 414      private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 415      {
 416          if (in_array((string) $child->nodeName, self::H1_ETC, true)) {
 417              if ($this->tableLevel > 0) {
 418                  //    If we're inside a table, replace with a \n
 419                  $cellContent .= $cellContent ? "\n" : '';
 420                  $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
 421                  $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 422              } else {
 423                  if ($cellContent > '') {
 424                      $this->flushCell($sheet, $column, $row, $cellContent);
 425                      ++$row;
 426                  }
 427                  $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 428                  $this->flushCell($sheet, $column, $row, $cellContent);
 429  
 430                  if (isset($this->formats[$child->nodeName])) {
 431                      $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
 432                  }
 433  
 434                  ++$row;
 435                  $column = 'A';
 436              }
 437          } else {
 438              $this->processDomElementLi($sheet, $row, $column, $cellContent, $child, $attributeArray);
 439          }
 440      }
 441  
 442      private function processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 443      {
 444          if ($child->nodeName === 'li') {
 445              if ($this->tableLevel > 0) {
 446                  //    If we're inside a table, replace with a \n
 447                  $cellContent .= $cellContent ? "\n" : '';
 448                  $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 449              } else {
 450                  if ($cellContent > '') {
 451                      $this->flushCell($sheet, $column, $row, $cellContent);
 452                  }
 453                  ++$row;
 454                  $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 455                  $this->flushCell($sheet, $column, $row, $cellContent);
 456                  $column = 'A';
 457              }
 458          } else {
 459              $this->processDomElementImg($sheet, $row, $column, $cellContent, $child, $attributeArray);
 460          }
 461      }
 462  
 463      private function processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 464      {
 465          if ($child->nodeName === 'img') {
 466              $this->insertImage($sheet, $column, $row, $attributeArray);
 467          } else {
 468              $this->processDomElementTable($sheet, $row, $column, $cellContent, $child, $attributeArray);
 469          }
 470      }
 471  
 472      private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 473      {
 474          if ($child->nodeName === 'table') {
 475              $this->flushCell($sheet, $column, $row, $cellContent);
 476              $column = $this->setTableStartColumn($column);
 477              if ($this->tableLevel > 1 && $row > 1) {
 478                  --$row;
 479              }
 480              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 481              $column = $this->releaseTableStartColumn();
 482              if ($this->tableLevel > 1) {
 483                  ++$column;
 484              } else {
 485                  ++$row;
 486              }
 487          } else {
 488              $this->processDomElementTr($sheet, $row, $column, $cellContent, $child, $attributeArray);
 489          }
 490      }
 491  
 492      private function processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 493      {
 494          if ($child->nodeName === 'tr') {
 495              $column = $this->getTableStartColumn();
 496              $cellContent = '';
 497              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 498  
 499              if (isset($attributeArray['height'])) {
 500                  $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
 501              }
 502  
 503              ++$row;
 504          } else {
 505              $this->processDomElementThTdOther($sheet, $row, $column, $cellContent, $child, $attributeArray);
 506          }
 507      }
 508  
 509      private function processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 510      {
 511          if ($child->nodeName !== 'td' && $child->nodeName !== 'th') {
 512              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 513          } else {
 514              $this->processDomElementThTd($sheet, $row, $column, $cellContent, $child, $attributeArray);
 515          }
 516      }
 517  
 518      private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void
 519      {
 520          if (isset($attributeArray['bgcolor'])) {
 521              $sheet->getStyle("$column$row")->applyFromArray(
 522                  [
 523                      'fill' => [
 524                          'fillType' => Fill::FILL_SOLID,
 525                          'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])],
 526                      ],
 527                  ]
 528              );
 529          }
 530      }
 531  
 532      private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void
 533      {
 534          if (isset($attributeArray['width'])) {
 535              $sheet->getColumnDimension($column)->setWidth((new CssDimension($attributeArray['width']))->width());
 536          }
 537      }
 538  
 539      private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void
 540      {
 541          if (isset($attributeArray['height'])) {
 542              $sheet->getRowDimension($row)->setRowHeight((new CssDimension($attributeArray['height']))->height());
 543          }
 544      }
 545  
 546      private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
 547      {
 548          if (isset($attributeArray['align'])) {
 549              $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
 550          }
 551      }
 552  
 553      private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
 554      {
 555          if (isset($attributeArray['valign'])) {
 556              $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
 557          }
 558      }
 559  
 560      private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void
 561      {
 562          if (isset($attributeArray['data-format'])) {
 563              $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
 564          }
 565      }
 566  
 567      private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 568      {
 569          while (isset($this->rowspan[$column . $row])) {
 570              ++$column;
 571          }
 572          $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 573  
 574          // apply inline style
 575          $this->applyInlineStyle($sheet, $row, $column, $attributeArray);
 576  
 577          $this->flushCell($sheet, $column, $row, $cellContent);
 578  
 579          $this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
 580          $this->processDomElementWidth($sheet, $column, $attributeArray);
 581          $this->processDomElementHeight($sheet, $row, $attributeArray);
 582          $this->processDomElementAlign($sheet, $row, $column, $attributeArray);
 583          $this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
 584          $this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
 585  
 586          if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
 587              //create merging rowspan and colspan
 588              $columnTo = $column;
 589              for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
 590                  ++$columnTo;
 591              }
 592              $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
 593              foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
 594                  $this->rowspan[$value] = true;
 595              }
 596              $sheet->mergeCells($range);
 597              $column = $columnTo;
 598          } elseif (isset($attributeArray['rowspan'])) {
 599              //create merging rowspan
 600              $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
 601              foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
 602                  $this->rowspan[$value] = true;
 603              }
 604              $sheet->mergeCells($range);
 605          } elseif (isset($attributeArray['colspan'])) {
 606              //create merging colspan
 607              $columnTo = $column;
 608              for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
 609                  ++$columnTo;
 610              }
 611              $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
 612              $column = $columnTo;
 613          }
 614  
 615          ++$column;
 616      }
 617  
 618      protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void
 619      {
 620          foreach ($element->childNodes as $child) {
 621              if ($child instanceof DOMText) {
 622                  $domText = (string) preg_replace('/\s+/u', ' ', trim($child->nodeValue ?? ''));
 623                  if (is_string($cellContent)) {
 624                      //    simply append the text if the cell content is a plain text string
 625                      $cellContent .= $domText;
 626                  }
 627                  //    but if we have a rich text run instead, we need to append it correctly
 628                      //    TODO
 629              } elseif ($child instanceof DOMElement) {
 630                  $this->processDomElementBody($sheet, $row, $column, $cellContent, $child);
 631              }
 632          }
 633      }
 634  
 635      /**
 636       * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
 637       *
 638       * @param string $filename
 639       *
 640       * @return Spreadsheet
 641       */
 642      public function loadIntoExisting($filename, Spreadsheet $spreadsheet)
 643      {
 644          // Validate
 645          if (!$this->canRead($filename)) {
 646              throw new Exception($filename . ' is an Invalid HTML file.');
 647          }
 648  
 649          // Create a new DOM object
 650          $dom = new DOMDocument();
 651          // Reload the HTML file into the DOM object
 652          try {
 653              $convert = $this->securityScanner->scanFile($filename);
 654              $lowend = "\u{80}";
 655              $highend = "\u{10ffff}";
 656              $regexp = "/[$lowend-$highend]/u";
 657              /** @var callable */
 658              $callback = [self::class, 'replaceNonAscii'];
 659              $convert = preg_replace_callback($regexp, $callback, $convert);
 660              $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
 661          } catch (Throwable $e) {
 662              $loaded = false;
 663          }
 664          if ($loaded === false) {
 665              throw new Exception('Failed to load ' . $filename . ' as a DOM Document', 0, $e ?? null);
 666          }
 667  
 668          return $this->loadDocument($dom, $spreadsheet);
 669      }
 670  
 671      private static function replaceNonAscii(array $matches): string
 672      {
 673          return '&#' . mb_ord($matches[0], 'UTF-8') . ';';
 674      }
 675  
 676      /**
 677       * Spreadsheet from content.
 678       *
 679       * @param string $content
 680       */
 681      public function loadFromString($content, ?Spreadsheet $spreadsheet = null): Spreadsheet
 682      {
 683          //    Create a new DOM object
 684          $dom = new DOMDocument();
 685          //    Reload the HTML file into the DOM object
 686          try {
 687              $convert = $this->securityScanner->scan($content);
 688              $lowend = "\u{80}";
 689              $highend = "\u{10ffff}";
 690              $regexp = "/[$lowend-$highend]/u";
 691              /** @var callable */
 692              $callback = [self::class, 'replaceNonAscii'];
 693              $convert = preg_replace_callback($regexp, $callback, $convert);
 694              $loaded = ($convert === null) ? false : $dom->loadHTML($convert);
 695          } catch (Throwable $e) {
 696              $loaded = false;
 697          }
 698          if ($loaded === false) {
 699              throw new Exception('Failed to load content as a DOM Document', 0, $e ?? null);
 700          }
 701  
 702          return $this->loadDocument($dom, $spreadsheet ?? new Spreadsheet());
 703      }
 704  
 705      /**
 706       * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
 707       */
 708      private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
 709      {
 710          while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
 711              $spreadsheet->createSheet();
 712          }
 713          $spreadsheet->setActiveSheetIndex($this->sheetIndex);
 714  
 715          // Discard white space
 716          $document->preserveWhiteSpace = false;
 717  
 718          $row = 0;
 719          $column = 'A';
 720          $content = '';
 721          $this->rowspan = [];
 722          $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
 723  
 724          // Return
 725          return $spreadsheet;
 726      }
 727  
 728      /**
 729       * Get sheet index.
 730       *
 731       * @return int
 732       */
 733      public function getSheetIndex()
 734      {
 735          return $this->sheetIndex;
 736      }
 737  
 738      /**
 739       * Set sheet index.
 740       *
 741       * @param int $sheetIndex Sheet index
 742       *
 743       * @return $this
 744       */
 745      public function setSheetIndex($sheetIndex)
 746      {
 747          $this->sheetIndex = $sheetIndex;
 748  
 749          return $this;
 750      }
 751  
 752      /**
 753       * Apply inline css inline style.
 754       *
 755       * NOTES :
 756       * Currently only intended for td & th element,
 757       * and only takes 'background-color' and 'color'; property with HEX color
 758       *
 759       * TODO :
 760       * - Implement to other propertie, such as border
 761       *
 762       * @param int $row
 763       * @param string $column
 764       * @param array $attributeArray
 765       */
 766      private function applyInlineStyle(Worksheet &$sheet, $row, $column, $attributeArray): void
 767      {
 768          if (!isset($attributeArray['style'])) {
 769              return;
 770          }
 771  
 772          if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
 773              $columnTo = $column;
 774              for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
 775                  ++$columnTo;
 776              }
 777              $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
 778              $cellStyle = $sheet->getStyle($range);
 779          } elseif (isset($attributeArray['rowspan'])) {
 780              $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
 781              $cellStyle = $sheet->getStyle($range);
 782          } elseif (isset($attributeArray['colspan'])) {
 783              $columnTo = $column;
 784              for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
 785                  ++$columnTo;
 786              }
 787              $range = $column . $row . ':' . $columnTo . $row;
 788              $cellStyle = $sheet->getStyle($range);
 789          } else {
 790              $cellStyle = $sheet->getStyle($column . $row);
 791          }
 792  
 793          // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color
 794          $styles = explode(';', $attributeArray['style']);
 795          foreach ($styles as $st) {
 796              $value = explode(':', $st);
 797              $styleName = isset($value[0]) ? trim($value[0]) : null;
 798              $styleValue = isset($value[1]) ? trim($value[1]) : null;
 799              $styleValueString = (string) $styleValue;
 800  
 801              if (!$styleName) {
 802                  continue;
 803              }
 804  
 805              switch ($styleName) {
 806                  case 'background':
 807                  case 'background-color':
 808                      $styleColor = $this->getStyleColor($styleValueString);
 809  
 810                      if (!$styleColor) {
 811                          continue 2;
 812                      }
 813  
 814                      $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]);
 815  
 816                      break;
 817                  case 'color':
 818                      $styleColor = $this->getStyleColor($styleValueString);
 819  
 820                      if (!$styleColor) {
 821                          continue 2;
 822                      }
 823  
 824                      $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]);
 825  
 826                      break;
 827  
 828                  case 'border':
 829                      $this->setBorderStyle($cellStyle, $styleValueString, 'allBorders');
 830  
 831                      break;
 832  
 833                  case 'border-top':
 834                      $this->setBorderStyle($cellStyle, $styleValueString, 'top');
 835  
 836                      break;
 837  
 838                  case 'border-bottom':
 839                      $this->setBorderStyle($cellStyle, $styleValueString, 'bottom');
 840  
 841                      break;
 842  
 843                  case 'border-left':
 844                      $this->setBorderStyle($cellStyle, $styleValueString, 'left');
 845  
 846                      break;
 847  
 848                  case 'border-right':
 849                      $this->setBorderStyle($cellStyle, $styleValueString, 'right');
 850  
 851                      break;
 852  
 853                  case 'font-size':
 854                      $cellStyle->getFont()->setSize(
 855                          (float) $styleValue
 856                      );
 857  
 858                      break;
 859  
 860                  case 'font-weight':
 861                      if ($styleValue === 'bold' || $styleValue >= 500) {
 862                          $cellStyle->getFont()->setBold(true);
 863                      }
 864  
 865                      break;
 866  
 867                  case 'font-style':
 868                      if ($styleValue === 'italic') {
 869                          $cellStyle->getFont()->setItalic(true);
 870                      }
 871  
 872                      break;
 873  
 874                  case 'font-family':
 875                      $cellStyle->getFont()->setName(str_replace('\'', '', $styleValueString));
 876  
 877                      break;
 878  
 879                  case 'text-decoration':
 880                      switch ($styleValue) {
 881                          case 'underline':
 882                              $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE);
 883  
 884                              break;
 885                          case 'line-through':
 886                              $cellStyle->getFont()->setStrikethrough(true);
 887  
 888                              break;
 889                      }
 890  
 891                      break;
 892  
 893                  case 'text-align':
 894                      $cellStyle->getAlignment()->setHorizontal($styleValueString);
 895  
 896                      break;
 897  
 898                  case 'vertical-align':
 899                      $cellStyle->getAlignment()->setVertical($styleValueString);
 900  
 901                      break;
 902  
 903                  case 'width':
 904                      $sheet->getColumnDimension($column)->setWidth(
 905                          (new CssDimension($styleValue ?? ''))->width()
 906                      );
 907  
 908                      break;
 909  
 910                  case 'height':
 911                      $sheet->getRowDimension($row)->setRowHeight(
 912                          (new CssDimension($styleValue ?? ''))->height()
 913                      );
 914  
 915                      break;
 916  
 917                  case 'word-wrap':
 918                      $cellStyle->getAlignment()->setWrapText(
 919                          $styleValue === 'break-word'
 920                      );
 921  
 922                      break;
 923  
 924                  case 'text-indent':
 925                      $cellStyle->getAlignment()->setIndent(
 926                          (int) str_replace(['px'], '', $styleValueString)
 927                      );
 928  
 929                      break;
 930              }
 931          }
 932      }
 933  
 934      /**
 935       * Check if has #, so we can get clean hex.
 936       *
 937       * @param mixed $value
 938       *
 939       * @return null|string
 940       */
 941      public function getStyleColor($value)
 942      {
 943          $value = (string) $value;
 944          if (strpos($value ?? '', '#') === 0) {
 945              return substr($value, 1);
 946          }
 947  
 948          return \PhpOffice\PhpSpreadsheet\Helper\Html::colourNameLookup($value);
 949      }
 950  
 951      /**
 952       * @param string    $column
 953       * @param int       $row
 954       */
 955      private function insertImage(Worksheet $sheet, $column, $row, array $attributes): void
 956      {
 957          if (!isset($attributes['src'])) {
 958              return;
 959          }
 960  
 961          $src = urldecode($attributes['src']);
 962          $width = isset($attributes['width']) ? (float) $attributes['width'] : null;
 963          $height = isset($attributes['height']) ? (float) $attributes['height'] : null;
 964          $name = $attributes['alt'] ?? null;
 965  
 966          $drawing = new Drawing();
 967          $drawing->setPath($src);
 968          $drawing->setWorksheet($sheet);
 969          $drawing->setCoordinates($column . $row);
 970          $drawing->setOffsetX(0);
 971          $drawing->setOffsetY(10);
 972          $drawing->setResizeProportional(true);
 973  
 974          if ($name) {
 975              $drawing->setName($name);
 976          }
 977  
 978          if ($width) {
 979              $drawing->setWidth((int) $width);
 980          }
 981  
 982          if ($height) {
 983              $drawing->setHeight((int) $height);
 984          }
 985  
 986          $sheet->getColumnDimension($column)->setWidth(
 987              $drawing->getWidth() / 6
 988          );
 989  
 990          $sheet->getRowDimension($row)->setRowHeight(
 991              $drawing->getHeight() * 0.9
 992          );
 993      }
 994  
 995      private const BORDER_MAPPINGS = [
 996          'dash-dot' => Border::BORDER_DASHDOT,
 997          'dash-dot-dot' => Border::BORDER_DASHDOTDOT,
 998          'dashed' => Border::BORDER_DASHED,
 999          'dotted' => Border::BORDER_DOTTED,
1000          'double' => Border::BORDER_DOUBLE,
1001          'hair' => Border::BORDER_HAIR,
1002          'medium' => Border::BORDER_MEDIUM,
1003          'medium-dashed' => Border::BORDER_MEDIUMDASHED,
1004          'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT,
1005          'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT,
1006          'none' => Border::BORDER_NONE,
1007          'slant-dash-dot' => Border::BORDER_SLANTDASHDOT,
1008          'solid' => Border::BORDER_THIN,
1009          'thick' => Border::BORDER_THICK,
1010      ];
1011  
1012      public static function getBorderMappings(): array
1013      {
1014          return self::BORDER_MAPPINGS;
1015      }
1016  
1017      /**
1018       * Map html border style to PhpSpreadsheet border style.
1019       *
1020       * @param  string $style
1021       *
1022       * @return null|string
1023       */
1024      public function getBorderStyle($style)
1025      {
1026          return self::BORDER_MAPPINGS[$style] ?? null;
1027      }
1028  
1029      /**
1030       * @param string $styleValue
1031       * @param string $type
1032       */
1033      private function setBorderStyle(Style $cellStyle, $styleValue, $type): void
1034      {
1035          if (trim($styleValue) === Border::BORDER_NONE) {
1036              $borderStyle = Border::BORDER_NONE;
1037              $color = null;
1038          } else {
1039              $borderArray = explode(' ', $styleValue);
1040              $borderCount = count($borderArray);
1041              if ($borderCount >= 3) {
1042                  $borderStyle = $borderArray[1];
1043                  $color = $borderArray[2];
1044              } else {
1045                  $borderStyle = $borderArray[0];
1046                  $color = $borderArray[1] ?? null;
1047              }
1048          }
1049  
1050          $cellStyle->applyFromArray([
1051              'borders' => [
1052                  $type => [
1053                      'borderStyle' => $this->getBorderStyle($borderStyle),
1054                      'color' => ['rgb' => $this->getStyleColor($color)],
1055                  ],
1056              ],
1057          ]);
1058      }
1059  }