Differences Between: [Versions 310 and 401] [Versions 310 and 402] [Versions 310 and 403]
1 <?php 2 3 /** 4 * Definition for tables. The general idea is to extract out all of the 5 * essential bits, and then reconstruct it later. 6 * 7 * This is a bit confusing, because the DTDs and the W3C 8 * validators seem to disagree on the appropriate definition. The 9 * DTD claims: 10 * 11 * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+) 12 * 13 * But actually, the HTML4 spec then has this to say: 14 * 15 * The TBODY start tag is always required except when the table 16 * contains only one table body and no table head or foot sections. 17 * The TBODY end tag may always be safely omitted. 18 * 19 * So the DTD is kind of wrong. The validator is, unfortunately, kind 20 * of on crack. 21 * 22 * The definition changed again in XHTML1.1; and in my opinion, this 23 * formulation makes the most sense. 24 * 25 * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ )) 26 * 27 * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode. 28 * If we encounter a thead, tfoot or tbody, we are placed in the former 29 * mode, and we *must* wrap any stray tr segments with a tbody. But if 30 * we don't run into any of them, just have tr tags is OK. 31 */ 32 class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef 33 { 34 /** 35 * @type bool 36 */ 37 public $allow_empty = false; 38 39 /** 40 * @type string 41 */ 42 public $type = 'table'; 43 44 /** 45 * @type array 46 */ 47 public $elements = array( 48 'tr' => true, 49 'tbody' => true, 50 'thead' => true, 51 'tfoot' => true, 52 'caption' => true, 53 'colgroup' => true, 54 'col' => true 55 ); 56 57 public function __construct() 58 { 59 } 60 61 /** 62 * @param array $children 63 * @param HTMLPurifier_Config $config 64 * @param HTMLPurifier_Context $context 65 * @return array 66 */ 67 public function validateChildren($children, $config, $context) 68 { 69 if (empty($children)) { 70 return false; 71 } 72 73 // only one of these elements is allowed in a table 74 $caption = false; 75 $thead = false; 76 $tfoot = false; 77 78 // whitespace 79 $initial_ws = array(); 80 $after_caption_ws = array(); 81 $after_thead_ws = array(); 82 $after_tfoot_ws = array(); 83 84 // as many of these as you want 85 $cols = array(); 86 $content = array(); 87 88 $tbody_mode = false; // if true, then we need to wrap any stray 89 // <tr>s with a <tbody>. 90 91 $ws_accum =& $initial_ws; 92 93 foreach ($children as $node) { 94 if ($node instanceof HTMLPurifier_Node_Comment) { 95 $ws_accum[] = $node; 96 continue; 97 } 98 switch ($node->name) { 99 case 'tbody': 100 $tbody_mode = true; 101 // fall through 102 case 'tr': 103 $content[] = $node; 104 $ws_accum =& $content; 105 break; 106 case 'caption': 107 // there can only be one caption! 108 if ($caption !== false) break; 109 $caption = $node; 110 $ws_accum =& $after_caption_ws; 111 break; 112 case 'thead': 113 $tbody_mode = true; 114 // XXX This breaks rendering properties with 115 // Firefox, which never floats a <thead> to 116 // the top. Ever. (Our scheme will float the 117 // first <thead> to the top.) So maybe 118 // <thead>s that are not first should be 119 // turned into <tbody>? Very tricky, indeed. 120 if ($thead === false) { 121 $thead = $node; 122 $ws_accum =& $after_thead_ws; 123 } else { 124 // Oops, there's a second one! What 125 // should we do? Current behavior is to 126 // transmutate the first and last entries into 127 // tbody tags, and then put into content. 128 // Maybe a better idea is to *attach 129 // it* to the existing thead or tfoot? 130 // We don't do this, because Firefox 131 // doesn't float an extra tfoot to the 132 // bottom like it does for the first one. 133 $node->name = 'tbody'; 134 $content[] = $node; 135 $ws_accum =& $content; 136 } 137 break; 138 case 'tfoot': 139 // see above for some aveats 140 $tbody_mode = true; 141 if ($tfoot === false) { 142 $tfoot = $node; 143 $ws_accum =& $after_tfoot_ws; 144 } else { 145 $node->name = 'tbody'; 146 $content[] = $node; 147 $ws_accum =& $content; 148 } 149 break; 150 case 'colgroup': 151 case 'col': 152 $cols[] = $node; 153 $ws_accum =& $cols; 154 break; 155 case '#PCDATA': 156 // How is whitespace handled? We treat is as sticky to 157 // the *end* of the previous element. So all of the 158 // nonsense we have worked on is to keep things 159 // together. 160 if (!empty($node->is_whitespace)) { 161 $ws_accum[] = $node; 162 } 163 break; 164 } 165 } 166 167 if (empty($content)) { 168 return false; 169 } 170 171 $ret = $initial_ws; 172 if ($caption !== false) { 173 $ret[] = $caption; 174 $ret = array_merge($ret, $after_caption_ws); 175 } 176 if ($cols !== false) { 177 $ret = array_merge($ret, $cols); 178 } 179 if ($thead !== false) { 180 $ret[] = $thead; 181 $ret = array_merge($ret, $after_thead_ws); 182 } 183 if ($tfoot !== false) { 184 $ret[] = $tfoot; 185 $ret = array_merge($ret, $after_tfoot_ws); 186 } 187 188 if ($tbody_mode) { 189 // we have to shuffle tr into tbody 190 $current_tr_tbody = null; 191 192 foreach($content as $node) { 193 switch ($node->name) { 194 case 'tbody': 195 $current_tr_tbody = null; 196 $ret[] = $node; 197 break; 198 case 'tr': 199 if ($current_tr_tbody === null) { 200 $current_tr_tbody = new HTMLPurifier_Node_Element('tbody'); 201 $ret[] = $current_tr_tbody; 202 } 203 $current_tr_tbody->children[] = $node; 204 break; 205 case '#PCDATA': 206 //assert($node->is_whitespace); 207 if ($current_tr_tbody === null) { 208 $ret[] = $node; 209 } else { 210 $current_tr_tbody->children[] = $node; 211 } 212 break; 213 } 214 } 215 } else { 216 $ret = array_merge($ret, $content); 217 } 218 219 return $ret; 220 221 } 222 } 223 224 // vim: et sw=4 sts=4
title
Description
Body
title
Description
Body
title
Description
Body
title
Body