Search moodle.org's
Developer Documentation

See Release Notes

  • Bug fixes for general core bugs in 3.10.x will end 8 November 2021 (12 months).
  • Bug fixes for security issues in 3.10.x will end 9 May 2022 (18 months).
  • PHP version: minimum PHP 7.2.0 Note: minimum PHP version has increased since Moodle 3.8. PHP 7.3.x and 7.4.x are supported too.
   1  <?php
   2  
   3  /**

   4   * Injector that auto paragraphs text in the root node based on

   5   * double-spacing.

   6   * @todo Ensure all states are unit tested, including variations as well.

   7   * @todo Make a graph of the flow control for this Injector.

   8   */
   9  class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
  10  {
  11      /**

  12       * @type string

  13       */
  14      public $name = 'AutoParagraph';
  15  
  16      /**

  17       * @type array

  18       */
  19      public $needed = array('p');
  20  
  21      /**

  22       * @return HTMLPurifier_Token_Start

  23       */
  24      private function _pStart()
  25      {
  26          $par = new HTMLPurifier_Token_Start('p');
  27          $par->armor['MakeWellFormed_TagClosedError'] = true;
  28          return $par;
  29      }
  30  
  31      /**

  32       * @param HTMLPurifier_Token_Text $token

  33       */
  34      public function handleText(&$token)
  35      {
  36          $text = $token->data;
  37          // Does the current parent allow <p> tags?

  38          if ($this->allowsElement('p')) {
  39              if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
  40                  // Note that we have differing behavior when dealing with text

  41                  // in the anonymous root node, or a node inside the document.

  42                  // If the text as a double-newline, the treatment is the same;

  43                  // if it doesn't, see the next if-block if you're in the document.

  44  
  45                  $i = $nesting = null;
  46                  if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
  47                      // State 1.1: ...    ^ (whitespace, then document end)

  48                      //               ----

  49                      // This is a degenerate case

  50                  } else {
  51                      if (!$token->is_whitespace || $this->_isInline($current)) {
  52                          // State 1.2: PAR1

  53                          //            ----

  54  
  55                          // State 1.3: PAR1\n\nPAR2

  56                          //            ------------

  57  
  58                          // State 1.4: <div>PAR1\n\nPAR2 (see State 2)

  59                          //                 ------------

  60                          $token = array($this->_pStart());
  61                          $this->_splitText($text, $token);
  62                      } else {
  63                          // State 1.5: \n<hr />

  64                          //            --

  65                      }
  66                  }
  67              } else {
  68                  // State 2:   <div>PAR1... (similar to 1.4)

  69                  //                 ----

  70  
  71                  // We're in an element that allows paragraph tags, but we're not

  72                  // sure if we're going to need them.

  73                  if ($this->_pLookAhead()) {
  74                      // State 2.1: <div>PAR1<b>PAR1\n\nPAR2

  75                      //                 ----

  76                      // Note: This will always be the first child, since any

  77                      // previous inline element would have triggered this very

  78                      // same routine, and found the double newline. One possible

  79                      // exception would be a comment.

  80                      $token = array($this->_pStart(), $token);
  81                  } else {
  82                      // State 2.2.1: <div>PAR1<div>

  83                      //                   ----

  84  
  85                      // State 2.2.2: <div>PAR1<b>PAR1</b></div>

  86                      //                   ----

  87                  }
  88              }
  89              // Is the current parent a <p> tag?

  90          } elseif (!empty($this->currentNesting) &&
  91              $this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {
  92              // State 3.1: ...<p>PAR1

  93              //                  ----

  94  
  95              // State 3.2: ...<p>PAR1\n\nPAR2

  96              //                  ------------

  97              $token = array();
  98              $this->_splitText($text, $token);
  99              // Abort!

 100          } else {
 101              // State 4.1: ...<b>PAR1

 102              //                  ----

 103  
 104              // State 4.2: ...<b>PAR1\n\nPAR2

 105              //                  ------------

 106          }
 107      }
 108  
 109      /**

 110       * @param HTMLPurifier_Token $token

 111       */
 112      public function handleElement(&$token)
 113      {
 114          // We don't have to check if we're already in a <p> tag for block

 115          // tokens, because the tag would have been autoclosed by MakeWellFormed.

 116          if ($this->allowsElement('p')) {
 117              if (!empty($this->currentNesting)) {
 118                  if ($this->_isInline($token)) {
 119                      // State 1: <div>...<b>

 120                      //                  ---

 121                      // Check if this token is adjacent to the parent token

 122                      // (seek backwards until token isn't whitespace)

 123                      $i = null;
 124                      $this->backward($i, $prev);
 125  
 126                      if (!$prev instanceof HTMLPurifier_Token_Start) {
 127                          // Token wasn't adjacent

 128                          if ($prev instanceof HTMLPurifier_Token_Text &&
 129                              substr($prev->data, -2) === "\n\n"
 130                          ) {
 131                              // State 1.1.4: <div><p>PAR1</p>\n\n<b>

 132                              //                                  ---

 133                              // Quite frankly, this should be handled by splitText

 134                              $token = array($this->_pStart(), $token);
 135                          } else {
 136                              // State 1.1.1: <div><p>PAR1</p><b>

 137                              //                              ---

 138                              // State 1.1.2: <div><br /><b>

 139                              //                         ---

 140                              // State 1.1.3: <div>PAR<b>

 141                              //                      ---

 142                          }
 143                      } else {
 144                          // State 1.2.1: <div><b>

 145                          //                   ---

 146                          // Lookahead to see if <p> is needed.

 147                          if ($this->_pLookAhead()) {
 148                              // State 1.3.1: <div><b>PAR1\n\nPAR2

 149                              //                   ---

 150                              $token = array($this->_pStart(), $token);
 151                          } else {
 152                              // State 1.3.2: <div><b>PAR1</b></div>

 153                              //                   ---

 154  
 155                              // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>

 156                              //                   ---

 157                          }
 158                      }
 159                  } else {
 160                      // State 2.3: ...<div>

 161                      //               -----

 162                  }
 163              } else {
 164                  if ($this->_isInline($token)) {
 165                      // State 3.1: <b>

 166                      //            ---

 167                      // This is where the {p} tag is inserted, not reflected in

 168                      // inputTokens yet, however.

 169                      $token = array($this->_pStart(), $token);
 170                  } else {
 171                      // State 3.2: <div>

 172                      //            -----

 173                  }
 174  
 175                  $i = null;
 176                  if ($this->backward($i, $prev)) {
 177                      if (!$prev instanceof HTMLPurifier_Token_Text) {
 178                          // State 3.1.1: ...</p>{p}<b>

 179                          //                        ---

 180                          // State 3.2.1: ...</p><div>

 181                          //                     -----

 182                          if (!is_array($token)) {
 183                              $token = array($token);
 184                          }
 185                          array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
 186                      } else {
 187                          // State 3.1.2: ...</p>\n\n{p}<b>

 188                          //                            ---

 189                          // State 3.2.2: ...</p>\n\n<div>

 190                          //                         -----

 191                          // Note: PAR<ELEM> cannot occur because PAR would have been

 192                          // wrapped in <p> tags.

 193                      }
 194                  }
 195              }
 196          } else {
 197              // State 2.2: <ul><li>

 198              //                ----

 199              // State 2.4: <p><b>

 200              //               ---

 201          }
 202      }
 203  
 204      /**

 205       * Splits up a text in paragraph tokens and appends them

 206       * to the result stream that will replace the original

 207       * @param string $data String text data that will be processed

 208       *    into paragraphs

 209       * @param HTMLPurifier_Token[] $result Reference to array of tokens that the

 210       *    tags will be appended onto

 211       */
 212      private function _splitText($data, &$result)
 213      {
 214          $raw_paragraphs = explode("\n\n", $data);
 215          $paragraphs = array(); // without empty paragraphs

 216          $needs_start = false;
 217          $needs_end = false;
 218  
 219          $c = count($raw_paragraphs);
 220          if ($c == 1) {
 221              // There were no double-newlines, abort quickly. In theory this

 222              // should never happen.

 223              $result[] = new HTMLPurifier_Token_Text($data);
 224              return;
 225          }
 226          for ($i = 0; $i < $c; $i++) {
 227              $par = $raw_paragraphs[$i];
 228              if (trim($par) !== '') {
 229                  $paragraphs[] = $par;
 230              } else {
 231                  if ($i == 0) {
 232                      // Double newline at the front

 233                      if (empty($result)) {
 234                          // The empty result indicates that the AutoParagraph

 235                          // injector did not add any start paragraph tokens.

 236                          // This means that we have been in a paragraph for

 237                          // a while, and the newline means we should start a new one.

 238                          $result[] = new HTMLPurifier_Token_End('p');
 239                          $result[] = new HTMLPurifier_Token_Text("\n\n");
 240                          // However, the start token should only be added if

 241                          // there is more processing to be done (i.e. there are

 242                          // real paragraphs in here). If there are none, the

 243                          // next start paragraph tag will be handled by the

 244                          // next call to the injector

 245                          $needs_start = true;
 246                      } else {
 247                          // We just started a new paragraph!

 248                          // Reinstate a double-newline for presentation's sake, since

 249                          // it was in the source code.

 250                          array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
 251                      }
 252                  } elseif ($i + 1 == $c) {
 253                      // Double newline at the end

 254                      // There should be a trailing </p> when we're finally done.

 255                      $needs_end = true;
 256                  }
 257              }
 258          }
 259  
 260          // Check if this was just a giant blob of whitespace. Move this earlier,

 261          // perhaps?

 262          if (empty($paragraphs)) {
 263              return;
 264          }
 265  
 266          // Add the start tag indicated by \n\n at the beginning of $data

 267          if ($needs_start) {
 268              $result[] = $this->_pStart();
 269          }
 270  
 271          // Append the paragraphs onto the result

 272          foreach ($paragraphs as $par) {
 273              $result[] = new HTMLPurifier_Token_Text($par);
 274              $result[] = new HTMLPurifier_Token_End('p');
 275              $result[] = new HTMLPurifier_Token_Text("\n\n");
 276              $result[] = $this->_pStart();
 277          }
 278  
 279          // Remove trailing start token; Injector will handle this later if

 280          // it was indeed needed. This prevents from needing to do a lookahead,

 281          // at the cost of a lookbehind later.

 282          array_pop($result);
 283  
 284          // If there is no need for an end tag, remove all of it and let

 285          // MakeWellFormed close it later.

 286          if (!$needs_end) {
 287              array_pop($result); // removes \n\n

 288              array_pop($result); // removes </p>

 289          }
 290      }
 291  
 292      /**

 293       * Returns true if passed token is inline (and, ergo, allowed in

 294       * paragraph tags)

 295       * @param HTMLPurifier_Token $token

 296       * @return bool

 297       */
 298      private function _isInline($token)
 299      {
 300          return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
 301      }
 302  
 303      /**

 304       * Looks ahead in the token list and determines whether or not we need

 305       * to insert a <p> tag.

 306       * @return bool

 307       */
 308      private function _pLookAhead()
 309      {
 310          if ($this->currentToken instanceof HTMLPurifier_Token_Start) {
 311              $nesting = 1;
 312          } else {
 313              $nesting = 0;
 314          }
 315          $ok = false;
 316          $i = null;
 317          while ($this->forwardUntilEndToken($i, $current, $nesting)) {
 318              $result = $this->_checkNeedsP($current);
 319              if ($result !== null) {
 320                  $ok = $result;
 321                  break;
 322              }
 323          }
 324          return $ok;
 325      }
 326  
 327      /**

 328       * Determines if a particular token requires an earlier inline token

 329       * to get a paragraph. This should be used with _forwardUntilEndToken

 330       * @param HTMLPurifier_Token $current

 331       * @return bool

 332       */
 333      private function _checkNeedsP($current)
 334      {
 335          if ($current instanceof HTMLPurifier_Token_Start) {
 336              if (!$this->_isInline($current)) {
 337                  // <div>PAR1<div>

 338                  //      ----

 339                  // Terminate early, since we hit a block element

 340                  return false;
 341              }
 342          } elseif ($current instanceof HTMLPurifier_Token_Text) {
 343              if (strpos($current->data, "\n\n") !== false) {
 344                  // <div>PAR1<b>PAR1\n\nPAR2

 345                  //      ----

 346                  return true;
 347              } else {
 348                  // <div>PAR1<b>PAR1...

 349                  //      ----

 350              }
 351          }
 352          return null;
 353      }
 354  }
 355  
 356  // vim: et sw=4 sts=4