Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.11.x will end 14 Nov 2022 (12 months plus 6 months extension).
  • Bug fixes for security issues in 3.11.x will end 13 Nov 2023 (18 months plus 12 months extension).
  • PHP version: minimum PHP 7.3.0 Note: minimum PHP version has increased since Moodle 3.10. PHP 7.4.x is supported too.

Differences Between: [Versions 310 and 311] [Versions 311 and 400] [Versions 311 and 401] [Versions 311 and 402] [Versions 311 and 403] [Versions 39 and 311]

   1  <?php
   2  
   3  namespace PhpOffice\PhpSpreadsheet\Reader;
   4  
   5  use DOMDocument;
   6  use DOMElement;
   7  use DOMNode;
   8  use DOMText;
   9  use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
  10  use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
  11  use PhpOffice\PhpSpreadsheet\Spreadsheet;
  12  use PhpOffice\PhpSpreadsheet\Style\Border;
  13  use PhpOffice\PhpSpreadsheet\Style\Color;
  14  use PhpOffice\PhpSpreadsheet\Style\Fill;
  15  use PhpOffice\PhpSpreadsheet\Style\Font;
  16  use PhpOffice\PhpSpreadsheet\Style\Style;
  17  use PhpOffice\PhpSpreadsheet\Worksheet\Drawing;
  18  use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
  19  use Throwable;
  20  
  21  /** PhpSpreadsheet root directory */
  22  class Html extends BaseReader
  23  {
  24      /**
  25       * Sample size to read to determine if it's HTML or not.
  26       */
  27      const TEST_SAMPLE_SIZE = 2048;
  28  
  29      /**
  30       * Input encoding.
  31       *
  32       * @var string
  33       */
  34      protected $inputEncoding = 'ANSI';
  35  
  36      /**
  37       * Sheet index to read.
  38       *
  39       * @var int
  40       */
  41      protected $sheetIndex = 0;
  42  
  43      /**
  44       * Formats.
  45       *
  46       * @var array
  47       */
  48      protected $formats = [
  49          'h1' => [
  50              'font' => [
  51                  'bold' => true,
  52                  'size' => 24,
  53              ],
  54          ], //    Bold, 24pt
  55          'h2' => [
  56              'font' => [
  57                  'bold' => true,
  58                  'size' => 18,
  59              ],
  60          ], //    Bold, 18pt
  61          'h3' => [
  62              'font' => [
  63                  'bold' => true,
  64                  'size' => 13.5,
  65              ],
  66          ], //    Bold, 13.5pt
  67          'h4' => [
  68              'font' => [
  69                  'bold' => true,
  70                  'size' => 12,
  71              ],
  72          ], //    Bold, 12pt
  73          'h5' => [
  74              'font' => [
  75                  'bold' => true,
  76                  'size' => 10,
  77              ],
  78          ], //    Bold, 10pt
  79          'h6' => [
  80              'font' => [
  81                  'bold' => true,
  82                  'size' => 7.5,
  83              ],
  84          ], //    Bold, 7.5pt
  85          'a' => [
  86              'font' => [
  87                  'underline' => true,
  88                  'color' => [
  89                      'argb' => Color::COLOR_BLUE,
  90                  ],
  91              ],
  92          ], //    Blue underlined
  93          'hr' => [
  94              'borders' => [
  95                  'bottom' => [
  96                      'borderStyle' => Border::BORDER_THIN,
  97                      'color' => [
  98                          Color::COLOR_BLACK,
  99                      ],
 100                  ],
 101              ],
 102          ], //    Bottom border
 103          'strong' => [
 104              'font' => [
 105                  'bold' => true,
 106              ],
 107          ], //    Bold
 108          'b' => [
 109              'font' => [
 110                  'bold' => true,
 111              ],
 112          ], //    Bold
 113          'i' => [
 114              'font' => [
 115                  'italic' => true,
 116              ],
 117          ], //    Italic
 118          'em' => [
 119              'font' => [
 120                  'italic' => true,
 121              ],
 122          ], //    Italic
 123      ];
 124  
 125      protected $rowspan = [];
 126  
 127      /**
 128       * Create a new HTML Reader instance.
 129       */
 130      public function __construct()
 131      {
 132          parent::__construct();
 133          $this->securityScanner = XmlScanner::getInstance($this);
 134      }
 135  
 136      /**
 137       * Validate that the current file is an HTML file.
 138       *
 139       * @param string $pFilename
 140       *
 141       * @return bool
 142       */
 143      public function canRead($pFilename)
 144      {
 145          // Check if file exists
 146          try {
 147              $this->openFile($pFilename);
 148          } catch (Exception $e) {
 149              return false;
 150          }
 151  
 152          $beginning = $this->readBeginning();
 153          $startWithTag = self::startsWithTag($beginning);
 154          $containsTags = self::containsTags($beginning);
 155          $endsWithTag = self::endsWithTag($this->readEnding());
 156  
 157          fclose($this->fileHandle);
 158  
 159          return $startWithTag && $containsTags && $endsWithTag;
 160      }
 161  
 162      private function readBeginning()
 163      {
 164          fseek($this->fileHandle, 0);
 165  
 166          return fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
 167      }
 168  
 169      private function readEnding()
 170      {
 171          $meta = stream_get_meta_data($this->fileHandle);
 172          $filename = $meta['uri'];
 173  
 174          $size = filesize($filename);
 175          if ($size === 0) {
 176              return '';
 177          }
 178  
 179          $blockSize = self::TEST_SAMPLE_SIZE;
 180          if ($size < $blockSize) {
 181              $blockSize = $size;
 182          }
 183  
 184          fseek($this->fileHandle, $size - $blockSize);
 185  
 186          return fread($this->fileHandle, $blockSize);
 187      }
 188  
 189      private static function startsWithTag($data)
 190      {
 191          return '<' === substr(trim($data), 0, 1);
 192      }
 193  
 194      private static function endsWithTag($data)
 195      {
 196          return '>' === substr(trim($data), -1, 1);
 197      }
 198  
 199      private static function containsTags($data)
 200      {
 201          return strlen($data) !== strlen(strip_tags($data));
 202      }
 203  
 204      /**
 205       * Loads Spreadsheet from file.
 206       *
 207       * @param string $pFilename
 208       *
 209       * @return Spreadsheet
 210       */
 211      public function load($pFilename)
 212      {
 213          // Create new Spreadsheet
 214          $spreadsheet = new Spreadsheet();
 215  
 216          // Load into this instance
 217          return $this->loadIntoExisting($pFilename, $spreadsheet);
 218      }
 219  
 220      /**
 221       * Set input encoding.
 222       *
 223       * @deprecated no use is made of this property
 224       *
 225       * @param string $pValue Input encoding, eg: 'ANSI'
 226       *
 227       * @return $this
 228       *
 229       * @codeCoverageIgnore
 230       */
 231      public function setInputEncoding($pValue)
 232      {
 233          $this->inputEncoding = $pValue;
 234  
 235          return $this;
 236      }
 237  
 238      /**
 239       * Get input encoding.
 240       *
 241       * @deprecated no use is made of this property
 242       *
 243       * @return string
 244       *
 245       * @codeCoverageIgnore
 246       */
 247      public function getInputEncoding()
 248      {
 249          return $this->inputEncoding;
 250      }
 251  
 252      //    Data Array used for testing only, should write to Spreadsheet object on completion of tests
 253      protected $dataArray = [];
 254  
 255      protected $tableLevel = 0;
 256  
 257      protected $nestedColumn = ['A'];
 258  
 259      protected function setTableStartColumn($column)
 260      {
 261          if ($this->tableLevel == 0) {
 262              $column = 'A';
 263          }
 264          ++$this->tableLevel;
 265          $this->nestedColumn[$this->tableLevel] = $column;
 266  
 267          return $this->nestedColumn[$this->tableLevel];
 268      }
 269  
 270      protected function getTableStartColumn()
 271      {
 272          return $this->nestedColumn[$this->tableLevel];
 273      }
 274  
 275      protected function releaseTableStartColumn()
 276      {
 277          --$this->tableLevel;
 278  
 279          return array_pop($this->nestedColumn);
 280      }
 281  
 282      protected function flushCell(Worksheet $sheet, $column, $row, &$cellContent): void
 283      {
 284          if (is_string($cellContent)) {
 285              //    Simple String content
 286              if (trim($cellContent) > '') {
 287                  //    Only actually write it if there's content in the string
 288                  //    Write to worksheet to be done here...
 289                  //    ... we return the cell so we can mess about with styles more easily
 290                  $sheet->setCellValue($column . $row, $cellContent);
 291                  $this->dataArray[$row][$column] = $cellContent;
 292              }
 293          } else {
 294              //    We have a Rich Text run
 295              //    TODO
 296              $this->dataArray[$row][$column] = 'RICH TEXT: ' . $cellContent;
 297          }
 298          $cellContent = (string) '';
 299      }
 300  
 301      private function processDomElementBody(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child): void
 302      {
 303          $attributeArray = [];
 304          foreach ($child->attributes as $attribute) {
 305              $attributeArray[$attribute->name] = $attribute->value;
 306          }
 307  
 308          if ($child->nodeName === 'body') {
 309              $row = 1;
 310              $column = 'A';
 311              $cellContent = '';
 312              $this->tableLevel = 0;
 313              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 314          } else {
 315              $this->processDomElementTitle($sheet, $row, $column, $cellContent, $child, $attributeArray);
 316          }
 317      }
 318  
 319      private function processDomElementTitle(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 320      {
 321          if ($child->nodeName === 'title') {
 322              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 323              $sheet->setTitle($cellContent, true, false);
 324              $cellContent = '';
 325          } else {
 326              $this->processDomElementSpanEtc($sheet, $row, $column, $cellContent, $child, $attributeArray);
 327          }
 328      }
 329  
 330      private static $spanEtc = ['span', 'div', 'font', 'i', 'em', 'strong', 'b'];
 331  
 332      private function processDomElementSpanEtc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 333      {
 334          if (in_array($child->nodeName, self::$spanEtc)) {
 335              if (isset($attributeArray['class']) && $attributeArray['class'] === 'comment') {
 336                  $sheet->getComment($column . $row)
 337                      ->getText()
 338                      ->createTextRun($child->textContent);
 339              }
 340              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 341  
 342              if (isset($this->formats[$child->nodeName])) {
 343                  $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
 344              }
 345          } else {
 346              $this->processDomElementHr($sheet, $row, $column, $cellContent, $child, $attributeArray);
 347          }
 348      }
 349  
 350      private function processDomElementHr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 351      {
 352          if ($child->nodeName === 'hr') {
 353              $this->flushCell($sheet, $column, $row, $cellContent);
 354              ++$row;
 355              if (isset($this->formats[$child->nodeName])) {
 356                  $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
 357              }
 358              ++$row;
 359          }
 360          // fall through to br
 361          $this->processDomElementBr($sheet, $row, $column, $cellContent, $child, $attributeArray);
 362      }
 363  
 364      private function processDomElementBr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 365      {
 366          if ($child->nodeName === 'br' || $child->nodeName === 'hr') {
 367              if ($this->tableLevel > 0) {
 368                  //    If we're inside a table, replace with a \n and set the cell to wrap
 369                  $cellContent .= "\n";
 370                  $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
 371              } else {
 372                  //    Otherwise flush our existing content and move the row cursor on
 373                  $this->flushCell($sheet, $column, $row, $cellContent);
 374                  ++$row;
 375              }
 376          } else {
 377              $this->processDomElementA($sheet, $row, $column, $cellContent, $child, $attributeArray);
 378          }
 379      }
 380  
 381      private function processDomElementA(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 382      {
 383          if ($child->nodeName === 'a') {
 384              foreach ($attributeArray as $attributeName => $attributeValue) {
 385                  switch ($attributeName) {
 386                      case 'href':
 387                          $sheet->getCell($column . $row)->getHyperlink()->setUrl($attributeValue);
 388                          if (isset($this->formats[$child->nodeName])) {
 389                              $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
 390                          }
 391  
 392                          break;
 393                      case 'class':
 394                          if ($attributeValue === 'comment-indicator') {
 395                              break; // Ignore - it's just a red square.
 396                          }
 397                  }
 398              }
 399              // no idea why this should be needed
 400              //$cellContent .= ' ';
 401              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 402          } else {
 403              $this->processDomElementH1Etc($sheet, $row, $column, $cellContent, $child, $attributeArray);
 404          }
 405      }
 406  
 407      private static $h1Etc = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'p'];
 408  
 409      private function processDomElementH1Etc(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 410      {
 411          if (in_array($child->nodeName, self::$h1Etc)) {
 412              if ($this->tableLevel > 0) {
 413                  //    If we're inside a table, replace with a \n
 414                  $cellContent .= $cellContent ? "\n" : '';
 415                  $sheet->getStyle($column . $row)->getAlignment()->setWrapText(true);
 416                  $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 417              } else {
 418                  if ($cellContent > '') {
 419                      $this->flushCell($sheet, $column, $row, $cellContent);
 420                      ++$row;
 421                  }
 422                  $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 423                  $this->flushCell($sheet, $column, $row, $cellContent);
 424  
 425                  if (isset($this->formats[$child->nodeName])) {
 426                      $sheet->getStyle($column . $row)->applyFromArray($this->formats[$child->nodeName]);
 427                  }
 428  
 429                  ++$row;
 430                  $column = 'A';
 431              }
 432          } else {
 433              $this->processDomElementLi($sheet, $row, $column, $cellContent, $child, $attributeArray);
 434          }
 435      }
 436  
 437      private function processDomElementLi(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 438      {
 439          if ($child->nodeName === 'li') {
 440              if ($this->tableLevel > 0) {
 441                  //    If we're inside a table, replace with a \n
 442                  $cellContent .= $cellContent ? "\n" : '';
 443                  $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 444              } else {
 445                  if ($cellContent > '') {
 446                      $this->flushCell($sheet, $column, $row, $cellContent);
 447                  }
 448                  ++$row;
 449                  $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 450                  $this->flushCell($sheet, $column, $row, $cellContent);
 451                  $column = 'A';
 452              }
 453          } else {
 454              $this->processDomElementImg($sheet, $row, $column, $cellContent, $child, $attributeArray);
 455          }
 456      }
 457  
 458      private function processDomElementImg(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 459      {
 460          if ($child->nodeName === 'img') {
 461              $this->insertImage($sheet, $column, $row, $attributeArray);
 462          } else {
 463              $this->processDomElementTable($sheet, $row, $column, $cellContent, $child, $attributeArray);
 464          }
 465      }
 466  
 467      private function processDomElementTable(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 468      {
 469          if ($child->nodeName === 'table') {
 470              $this->flushCell($sheet, $column, $row, $cellContent);
 471              $column = $this->setTableStartColumn($column);
 472              if ($this->tableLevel > 1) {
 473                  --$row;
 474              }
 475              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 476              $column = $this->releaseTableStartColumn();
 477              if ($this->tableLevel > 1) {
 478                  ++$column;
 479              } else {
 480                  ++$row;
 481              }
 482          } else {
 483              $this->processDomElementTr($sheet, $row, $column, $cellContent, $child, $attributeArray);
 484          }
 485      }
 486  
 487      private function processDomElementTr(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 488      {
 489          if ($child->nodeName === 'tr') {
 490              $column = $this->getTableStartColumn();
 491              $cellContent = '';
 492              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 493  
 494              if (isset($attributeArray['height'])) {
 495                  $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
 496              }
 497  
 498              ++$row;
 499          } else {
 500              $this->processDomElementThTdOther($sheet, $row, $column, $cellContent, $child, $attributeArray);
 501          }
 502      }
 503  
 504      private function processDomElementThTdOther(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 505      {
 506          if ($child->nodeName !== 'td' && $child->nodeName !== 'th') {
 507              $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 508          } else {
 509              $this->processDomElementThTd($sheet, $row, $column, $cellContent, $child, $attributeArray);
 510          }
 511      }
 512  
 513      private function processDomElementBgcolor(Worksheet $sheet, int $row, string $column, array $attributeArray): void
 514      {
 515          if (isset($attributeArray['bgcolor'])) {
 516              $sheet->getStyle("$column$row")->applyFromArray(
 517                  [
 518                      'fill' => [
 519                          'fillType' => Fill::FILL_SOLID,
 520                          'color' => ['rgb' => $this->getStyleColor($attributeArray['bgcolor'])],
 521                      ],
 522                  ]
 523              );
 524          }
 525      }
 526  
 527      private function processDomElementWidth(Worksheet $sheet, string $column, array $attributeArray): void
 528      {
 529          if (isset($attributeArray['width'])) {
 530              $sheet->getColumnDimension($column)->setWidth($attributeArray['width']);
 531          }
 532      }
 533  
 534      private function processDomElementHeight(Worksheet $sheet, int $row, array $attributeArray): void
 535      {
 536          if (isset($attributeArray['height'])) {
 537              $sheet->getRowDimension($row)->setRowHeight($attributeArray['height']);
 538          }
 539      }
 540  
 541      private function processDomElementAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
 542      {
 543          if (isset($attributeArray['align'])) {
 544              $sheet->getStyle($column . $row)->getAlignment()->setHorizontal($attributeArray['align']);
 545          }
 546      }
 547  
 548      private function processDomElementVAlign(Worksheet $sheet, int $row, string $column, array $attributeArray): void
 549      {
 550          if (isset($attributeArray['valign'])) {
 551              $sheet->getStyle($column . $row)->getAlignment()->setVertical($attributeArray['valign']);
 552          }
 553      }
 554  
 555      private function processDomElementDataFormat(Worksheet $sheet, int $row, string $column, array $attributeArray): void
 556      {
 557          if (isset($attributeArray['data-format'])) {
 558              $sheet->getStyle($column . $row)->getNumberFormat()->setFormatCode($attributeArray['data-format']);
 559          }
 560      }
 561  
 562      private function processDomElementThTd(Worksheet $sheet, int &$row, string &$column, string &$cellContent, DOMElement $child, array &$attributeArray): void
 563      {
 564          while (isset($this->rowspan[$column . $row])) {
 565              ++$column;
 566          }
 567          $this->processDomElement($child, $sheet, $row, $column, $cellContent);
 568  
 569          // apply inline style
 570          $this->applyInlineStyle($sheet, $row, $column, $attributeArray);
 571  
 572          $this->flushCell($sheet, $column, $row, $cellContent);
 573  
 574          $this->processDomElementBgcolor($sheet, $row, $column, $attributeArray);
 575          $this->processDomElementWidth($sheet, $column, $attributeArray);
 576          $this->processDomElementHeight($sheet, $row, $attributeArray);
 577          $this->processDomElementAlign($sheet, $row, $column, $attributeArray);
 578          $this->processDomElementVAlign($sheet, $row, $column, $attributeArray);
 579          $this->processDomElementDataFormat($sheet, $row, $column, $attributeArray);
 580  
 581          if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
 582              //create merging rowspan and colspan
 583              $columnTo = $column;
 584              for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
 585                  ++$columnTo;
 586              }
 587              $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
 588              foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
 589                  $this->rowspan[$value] = true;
 590              }
 591              $sheet->mergeCells($range);
 592              $column = $columnTo;
 593          } elseif (isset($attributeArray['rowspan'])) {
 594              //create merging rowspan
 595              $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
 596              foreach (Coordinate::extractAllCellReferencesInRange($range) as $value) {
 597                  $this->rowspan[$value] = true;
 598              }
 599              $sheet->mergeCells($range);
 600          } elseif (isset($attributeArray['colspan'])) {
 601              //create merging colspan
 602              $columnTo = $column;
 603              for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
 604                  ++$columnTo;
 605              }
 606              $sheet->mergeCells($column . $row . ':' . $columnTo . $row);
 607              $column = $columnTo;
 608          }
 609  
 610          ++$column;
 611      }
 612  
 613      protected function processDomElement(DOMNode $element, Worksheet $sheet, int &$row, string &$column, string &$cellContent): void
 614      {
 615          foreach ($element->childNodes as $child) {
 616              if ($child instanceof DOMText) {
 617                  $domText = preg_replace('/\s+/u', ' ', trim($child->nodeValue));
 618                  if (is_string($cellContent)) {
 619                      //    simply append the text if the cell content is a plain text string
 620                      $cellContent .= $domText;
 621                  }
 622                  //    but if we have a rich text run instead, we need to append it correctly
 623                      //    TODO
 624              } elseif ($child instanceof DOMElement) {
 625                  $this->processDomElementBody($sheet, $row, $column, $cellContent, $child);
 626              }
 627          }
 628      }
 629  
 630      /**
 631       * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
 632       *
 633       * @param string $pFilename
 634       *
 635       * @return Spreadsheet
 636       */
 637      public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
 638      {
 639          // Validate
 640          if (!$this->canRead($pFilename)) {
 641              throw new Exception($pFilename . ' is an Invalid HTML file.');
 642          }
 643  
 644          // Create a new DOM object
 645          $dom = new DOMDocument();
 646          // Reload the HTML file into the DOM object
 647          try {
 648              $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
 649          } catch (Throwable $e) {
 650              $loaded = false;
 651          }
 652          if ($loaded === false) {
 653              throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document');
 654          }
 655  
 656          return $this->loadDocument($dom, $spreadsheet);
 657      }
 658  
 659      /**
 660       * Spreadsheet from content.
 661       *
 662       * @param string $content
 663       */
 664      public function loadFromString($content, ?Spreadsheet $spreadsheet = null): Spreadsheet
 665      {
 666          //    Create a new DOM object
 667          $dom = new DOMDocument();
 668          //    Reload the HTML file into the DOM object
 669          try {
 670              $loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scan($content), 'HTML-ENTITIES', 'UTF-8'));
 671          } catch (Throwable $e) {
 672              $loaded = false;
 673          }
 674          if ($loaded === false) {
 675              throw new Exception('Failed to load content as a DOM Document');
 676          }
 677  
 678          return $this->loadDocument($dom, $spreadsheet ?? new Spreadsheet());
 679      }
 680  
 681      /**
 682       * Loads PhpSpreadsheet from DOMDocument into PhpSpreadsheet instance.
 683       */
 684      private function loadDocument(DOMDocument $document, Spreadsheet $spreadsheet): Spreadsheet
 685      {
 686          while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
 687              $spreadsheet->createSheet();
 688          }
 689          $spreadsheet->setActiveSheetIndex($this->sheetIndex);
 690  
 691          // Discard white space
 692          $document->preserveWhiteSpace = false;
 693  
 694          $row = 0;
 695          $column = 'A';
 696          $content = '';
 697          $this->rowspan = [];
 698          $this->processDomElement($document, $spreadsheet->getActiveSheet(), $row, $column, $content);
 699  
 700          // Return
 701          return $spreadsheet;
 702      }
 703  
 704      /**
 705       * Get sheet index.
 706       *
 707       * @return int
 708       */
 709      public function getSheetIndex()
 710      {
 711          return $this->sheetIndex;
 712      }
 713  
 714      /**
 715       * Set sheet index.
 716       *
 717       * @param int $pValue Sheet index
 718       *
 719       * @return $this
 720       */
 721      public function setSheetIndex($pValue)
 722      {
 723          $this->sheetIndex = $pValue;
 724  
 725          return $this;
 726      }
 727  
 728      /**
 729       * Apply inline css inline style.
 730       *
 731       * NOTES :
 732       * Currently only intended for td & th element,
 733       * and only takes 'background-color' and 'color'; property with HEX color
 734       *
 735       * TODO :
 736       * - Implement to other propertie, such as border
 737       *
 738       * @param Worksheet $sheet
 739       * @param int $row
 740       * @param string $column
 741       * @param array $attributeArray
 742       */
 743      private function applyInlineStyle(&$sheet, $row, $column, $attributeArray): void
 744      {
 745          if (!isset($attributeArray['style'])) {
 746              return;
 747          }
 748  
 749          if (isset($attributeArray['rowspan'], $attributeArray['colspan'])) {
 750              $columnTo = $column;
 751              for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
 752                  ++$columnTo;
 753              }
 754              $range = $column . $row . ':' . $columnTo . ($row + (int) $attributeArray['rowspan'] - 1);
 755              $cellStyle = $sheet->getStyle($range);
 756          } elseif (isset($attributeArray['rowspan'])) {
 757              $range = $column . $row . ':' . $column . ($row + (int) $attributeArray['rowspan'] - 1);
 758              $cellStyle = $sheet->getStyle($range);
 759          } elseif (isset($attributeArray['colspan'])) {
 760              $columnTo = $column;
 761              for ($i = 0; $i < (int) $attributeArray['colspan'] - 1; ++$i) {
 762                  ++$columnTo;
 763              }
 764              $range = $column . $row . ':' . $columnTo . $row;
 765              $cellStyle = $sheet->getStyle($range);
 766          } else {
 767              $cellStyle = $sheet->getStyle($column . $row);
 768          }
 769  
 770          // add color styles (background & text) from dom element,currently support : td & th, using ONLY inline css style with RGB color
 771          $styles = explode(';', $attributeArray['style']);
 772          foreach ($styles as $st) {
 773              $value = explode(':', $st);
 774              $styleName = isset($value[0]) ? trim($value[0]) : null;
 775              $styleValue = isset($value[1]) ? trim($value[1]) : null;
 776  
 777              if (!$styleName) {
 778                  continue;
 779              }
 780  
 781              switch ($styleName) {
 782                  case 'background':
 783                  case 'background-color':
 784                      $styleColor = $this->getStyleColor($styleValue);
 785  
 786                      if (!$styleColor) {
 787                          continue 2;
 788                      }
 789  
 790                      $cellStyle->applyFromArray(['fill' => ['fillType' => Fill::FILL_SOLID, 'color' => ['rgb' => $styleColor]]]);
 791  
 792                      break;
 793                  case 'color':
 794                      $styleColor = $this->getStyleColor($styleValue);
 795  
 796                      if (!$styleColor) {
 797                          continue 2;
 798                      }
 799  
 800                      $cellStyle->applyFromArray(['font' => ['color' => ['rgb' => $styleColor]]]);
 801  
 802                      break;
 803  
 804                  case 'border':
 805                      $this->setBorderStyle($cellStyle, $styleValue, 'allBorders');
 806  
 807                      break;
 808  
 809                  case 'border-top':
 810                      $this->setBorderStyle($cellStyle, $styleValue, 'top');
 811  
 812                      break;
 813  
 814                  case 'border-bottom':
 815                      $this->setBorderStyle($cellStyle, $styleValue, 'bottom');
 816  
 817                      break;
 818  
 819                  case 'border-left':
 820                      $this->setBorderStyle($cellStyle, $styleValue, 'left');
 821  
 822                      break;
 823  
 824                  case 'border-right':
 825                      $this->setBorderStyle($cellStyle, $styleValue, 'right');
 826  
 827                      break;
 828  
 829                  case 'font-size':
 830                      $cellStyle->getFont()->setSize(
 831                          (float) $styleValue
 832                      );
 833  
 834                      break;
 835  
 836                  case 'font-weight':
 837                      if ($styleValue === 'bold' || $styleValue >= 500) {
 838                          $cellStyle->getFont()->setBold(true);
 839                      }
 840  
 841                      break;
 842  
 843                  case 'font-style':
 844                      if ($styleValue === 'italic') {
 845                          $cellStyle->getFont()->setItalic(true);
 846                      }
 847  
 848                      break;
 849  
 850                  case 'font-family':
 851                      $cellStyle->getFont()->setName(str_replace('\'', '', $styleValue));
 852  
 853                      break;
 854  
 855                  case 'text-decoration':
 856                      switch ($styleValue) {
 857                          case 'underline':
 858                              $cellStyle->getFont()->setUnderline(Font::UNDERLINE_SINGLE);
 859  
 860                              break;
 861                          case 'line-through':
 862                              $cellStyle->getFont()->setStrikethrough(true);
 863  
 864                              break;
 865                      }
 866  
 867                      break;
 868  
 869                  case 'text-align':
 870                      $cellStyle->getAlignment()->setHorizontal($styleValue);
 871  
 872                      break;
 873  
 874                  case 'vertical-align':
 875                      $cellStyle->getAlignment()->setVertical($styleValue);
 876  
 877                      break;
 878  
 879                  case 'width':
 880                      $sheet->getColumnDimension($column)->setWidth(
 881                          str_replace('px', '', $styleValue)
 882                      );
 883  
 884                      break;
 885  
 886                  case 'height':
 887                      $sheet->getRowDimension($row)->setRowHeight(
 888                          str_replace('px', '', $styleValue)
 889                      );
 890  
 891                      break;
 892  
 893                  case 'word-wrap':
 894                      $cellStyle->getAlignment()->setWrapText(
 895                          $styleValue === 'break-word'
 896                      );
 897  
 898                      break;
 899  
 900                  case 'text-indent':
 901                      $cellStyle->getAlignment()->setIndent(
 902                          (int) str_replace(['px'], '', $styleValue)
 903                      );
 904  
 905                      break;
 906              }
 907          }
 908      }
 909  
 910      /**
 911       * Check if has #, so we can get clean hex.
 912       *
 913       * @param $value
 914       *
 915       * @return null|string
 916       */
 917      public function getStyleColor($value)
 918      {
 919          if (strpos($value, '#') === 0) {
 920              return substr($value, 1);
 921          }
 922  
 923          return \PhpOffice\PhpSpreadsheet\Helper\Html::colourNameLookup((string) $value);
 924      }
 925  
 926      /**
 927       * @param string    $column
 928       * @param int       $row
 929       */
 930      private function insertImage(Worksheet $sheet, $column, $row, array $attributes): void
 931      {
 932          if (!isset($attributes['src'])) {
 933              return;
 934          }
 935  
 936          $src = urldecode($attributes['src']);
 937          $width = isset($attributes['width']) ? (float) $attributes['width'] : null;
 938          $height = isset($attributes['height']) ? (float) $attributes['height'] : null;
 939          $name = $attributes['alt'] ?? null;
 940  
 941          $drawing = new Drawing();
 942          $drawing->setPath($src);
 943          $drawing->setWorksheet($sheet);
 944          $drawing->setCoordinates($column . $row);
 945          $drawing->setOffsetX(0);
 946          $drawing->setOffsetY(10);
 947          $drawing->setResizeProportional(true);
 948  
 949          if ($name) {
 950              $drawing->setName($name);
 951          }
 952  
 953          if ($width) {
 954              $drawing->setWidth((int) $width);
 955          }
 956  
 957          if ($height) {
 958              $drawing->setHeight((int) $height);
 959          }
 960  
 961          $sheet->getColumnDimension($column)->setWidth(
 962              $drawing->getWidth() / 6
 963          );
 964  
 965          $sheet->getRowDimension($row)->setRowHeight(
 966              $drawing->getHeight() * 0.9
 967          );
 968      }
 969  
 970      private static $borderMappings = [
 971          'dash-dot' => Border::BORDER_DASHDOT,
 972          'dash-dot-dot' => Border::BORDER_DASHDOTDOT,
 973          'dashed' => Border::BORDER_DASHED,
 974          'dotted' => Border::BORDER_DOTTED,
 975          'double' => Border::BORDER_DOUBLE,
 976          'hair' => Border::BORDER_HAIR,
 977          'medium' => Border::BORDER_MEDIUM,
 978          'medium-dashed' => Border::BORDER_MEDIUMDASHED,
 979          'medium-dash-dot' => Border::BORDER_MEDIUMDASHDOT,
 980          'medium-dash-dot-dot' => Border::BORDER_MEDIUMDASHDOTDOT,
 981          'none' => Border::BORDER_NONE,
 982          'slant-dash-dot' => Border::BORDER_SLANTDASHDOT,
 983          'solid' => Border::BORDER_THIN,
 984          'thick' => Border::BORDER_THICK,
 985      ];
 986  
 987      public static function getBorderMappings(): array
 988      {
 989          return self::$borderMappings;
 990      }
 991  
 992      /**
 993       * Map html border style to PhpSpreadsheet border style.
 994       *
 995       * @param  string $style
 996       *
 997       * @return null|string
 998       */
 999      public function getBorderStyle($style)
1000      {
1001          return (array_key_exists($style, self::$borderMappings)) ? self::$borderMappings[$style] : null;
1002      }
1003  
1004      /**
1005       * @param string $styleValue
1006       * @param string $type
1007       */
1008      private function setBorderStyle(Style $cellStyle, $styleValue, $type): void
1009      {
1010          if (trim($styleValue) === Border::BORDER_NONE) {
1011              $borderStyle = Border::BORDER_NONE;
1012              $color = null;
1013          } else {
1014              [, $borderStyle, $color] = explode(' ', $styleValue);
1015          }
1016  
1017          $cellStyle->applyFromArray([
1018              'borders' => [
1019                  $type => [
1020                      'borderStyle' => $this->getBorderStyle($borderStyle),
1021                      'color' => ['rgb' => $this->getStyleColor($color)],
1022                  ],
1023              ],
1024          ]);
1025      }
1026  }