TagBalancingHtmlStreamEventReceiver.java revision 4c1e3417997042b0b485cbf71344a0210dfaba04
1// Copyright (c) 2011, Mike Samuel
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions
6// are met:
7//
8// Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// Redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution.
13// Neither the name of the OWASP nor the names of its contributors may
14// be used to endorse or promote products derived from this software
15// without specific prior written permission.
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28
29package org.owasp.html;
30
31import java.util.List;
32
33import javax.annotation.Nullable;
34import javax.annotation.concurrent.Immutable;
35
36import com.google.common.collect.ImmutableMap;
37import com.google.common.collect.Lists;
38
39/**
40 * Wraps an HTML stream event receiver to fill in missing close tags.
41 * If the balancer is given the HTML {@code <p>1<p>2}, the wrapped receiver will
42 * see events equivalent to {@code <p>1</p><p>2</p>}.
43 *
44 * @author Mike Samuel <mikesamuel@gmail.com>
45 */
46@TCB
47public class TagBalancingHtmlStreamEventReceiver
48    implements HtmlStreamEventReceiver {
49  private final HtmlStreamEventReceiver underlying;
50  private int nestingLimit = Integer.MAX_VALUE;
51  private final List<ElementContainmentInfo> openElements
52      = Lists.newArrayList();
53
54  public TagBalancingHtmlStreamEventReceiver(
55      HtmlStreamEventReceiver underlying) {
56    this.underlying = underlying;
57  }
58
59  public void setNestingLimit(int limit) {
60    if (openElements.size() > limit) {
61      throw new IllegalStateException();
62    }
63    this.nestingLimit = limit;
64  }
65
66  public void openDocument() {
67    underlying.openDocument();
68  }
69
70  public void closeDocument() {
71    for (int i = Math.min(nestingLimit, openElements.size()); --i >= 0;) {
72      underlying.closeTag(openElements.get(i).elementName);
73    }
74    openElements.clear();
75    underlying.closeDocument();
76  }
77
78  public void openTag(String elementName, List<String> attrs) {
79    String canonElementName = HtmlLexer.canonicalName(elementName);
80    ElementContainmentInfo elInfo = ELEMENT_CONTAINMENT_RELATIONSHIPS.get(
81        canonElementName);
82    // Treat unrecognized tags as void, but emit closing tags in closeTag().
83    if (elInfo == null) {
84      if (openElements.size() < nestingLimit) {
85        underlying.openTag(elementName, attrs);
86      }
87      return;
88    }
89
90    prepareForContent(elInfo);
91
92    if (openElements.size() < nestingLimit) {
93      underlying.openTag(elInfo.elementName, attrs);
94    }
95    if (!elInfo.isVoid) {
96      openElements.add(elInfo);
97    }
98  }
99
100  private void prepareForContent(ElementContainmentInfo elInfo) {
101    int nOpen = openElements.size();
102    if (nOpen != 0) {
103      ElementContainmentInfo top = openElements.get(nOpen - 1);
104      if ((top.contents & elInfo.types) == 0) {
105        ElementContainmentInfo blockContainerChild = top.blockContainerChild;
106        // Open implied elements, such as list-items and table cells & rows.
107        if (blockContainerChild != null
108            && (blockContainerChild.contents & elInfo.types) != 0) {
109          underlying.openTag(
110              blockContainerChild.elementName, Lists.<String>newArrayList());
111          openElements.add(blockContainerChild);
112          top = blockContainerChild;
113          ++nOpen;
114        }
115      }
116
117      // Close all the elements that cannot contain the element to open.
118      List<ElementContainmentInfo> toResumeInReverse = null;
119      while (true) {
120        if ((top.contents & elInfo.types) != 0) { break; }
121        if (openElements.size() < nestingLimit) {
122          underlying.closeTag(top.elementName);
123        }
124        openElements.remove(--nOpen);
125        if (top.resumable) {
126          if (toResumeInReverse == null) {
127            toResumeInReverse = Lists.newArrayList();
128          }
129          toResumeInReverse.add(top);
130        }
131        if (nOpen == 0) { break; }
132        top = openElements.get(nOpen - 1);
133      }
134
135      if (toResumeInReverse != null) {
136        resume(toResumeInReverse);
137      }
138    }
139  }
140
141  public void closeTag(String elementName) {
142    String canonElementName = HtmlLexer.canonicalName(elementName);
143    ElementContainmentInfo elInfo = ELEMENT_CONTAINMENT_RELATIONSHIPS.get(
144        canonElementName);
145    if (elInfo == null) {  // Allow unrecognized end tags through.
146      if (openElements.size() < nestingLimit) {
147        underlying.closeTag(elementName);
148      }
149      return;
150    }
151    int index = openElements.lastIndexOf(elInfo);
152    if (index < 0) {
153      // Let any of </h1>, </h2>, ... close other header tags.
154      if (isHeaderElementName(canonElementName)) {
155        for (int i = openElements.size(); -- i >= 0;) {
156          ElementContainmentInfo openEl = openElements.get(i);
157          if (isHeaderElementName(openEl.elementName)) {
158            elInfo = openEl;
159            index = i;
160            canonElementName = openEl.elementName;
161            break;
162          }
163        }
164      }
165      if (index < 0) {
166        return;  // Don't close unopened tags.
167      }
168    }
169    int last = openElements.size();
170    // Close all the elements that cannot contain the element to open.
171    List<ElementContainmentInfo> toResumeInReverse = null;
172    while (--last > index) {
173      ElementContainmentInfo unclosed = openElements.remove(last);
174      if (last + 1 < nestingLimit) {
175        underlying.closeTag(unclosed.elementName);
176      }
177      if (unclosed.resumable) {
178        if (toResumeInReverse == null) {
179          toResumeInReverse = Lists.newArrayList();
180        }
181        toResumeInReverse.add(unclosed);
182      }
183    }
184    if (openElements.size() < nestingLimit) {
185      underlying.closeTag(elInfo.elementName);
186    }
187    openElements.remove(index);
188    if (toResumeInReverse != null) {
189      resume(toResumeInReverse);
190    }
191  }
192
193  private void resume(List<ElementContainmentInfo> toResumeInReverse) {
194    for (ElementContainmentInfo toResume : toResumeInReverse) {
195      // TODO: If resuming of things other than plain formatting tags like <b>
196      // and <i>, then we need to store the attributes for resumable tags so
197      // that we can resume with the appropriate attributes.
198      if (openElements.size() < nestingLimit) {
199        underlying.openTag(toResume.elementName, Lists.<String>newArrayList());
200      }
201      openElements.add(toResume);
202    }
203  }
204
205  private static final int HTML_SPACE_CHAR_BITMASK =
206      (1 << ' ') | (1 << '\t') | (1 << '\n') | (1 << '\u000c') | (1 << '\r');
207
208  public void text(String text) {
209    int n = text.length();
210    for (int i = 0; i < n; ++i) {
211      int ch = text.charAt(i);
212      if (ch > 0x20 || (HTML_SPACE_CHAR_BITMASK & (1 << ch)) == 0) {
213        prepareForContent(ElementContainmentRelationships.CHARACTER_DATA);
214        break;
215      }
216    }
217
218    if (openElements.size() < nestingLimit) {
219      underlying.text(text);
220    }
221  }
222
223  private static boolean isHeaderElementName(String canonElementName) {
224    return canonElementName.length() == 2 && canonElementName.charAt(0) == 'h'
225        && canonElementName.charAt(1) <= '9';
226  }
227
228
229  @Immutable
230  private static final class ElementContainmentInfo {
231    final String elementName;
232    /**
233     * True if the adoption agency algorithm allows an element to be resumed
234     * after a mis-nested end tag closes it.
235     * E.g. in {@code <b>Foo<i>Bar</b>Baz</i>} the {@code <i>} element is
236     * resumed after the {@code <b>} element is closed.
237     */
238    final boolean resumable;
239    /** A set of bits of element groups into which the element falls. */
240    final int types;
241    /** The type of elements that an element can contain. */
242    final int contents;
243    /** True if the element has no content -- not even text content. */
244    final boolean isVoid;
245    /** A legal child of this node that can contain block content. */
246    final @Nullable ElementContainmentInfo blockContainerChild;
247
248    ElementContainmentInfo(
249        String elementName, boolean resumable, int types, int contents,
250        @Nullable ElementContainmentInfo blockContainerChild) {
251      this.elementName = elementName;
252      this.resumable = resumable;
253      this.types = types;
254      this.contents = contents;
255      this.isVoid = contents == 0
256          && HtmlTextEscapingMode.isVoidElement(elementName);
257      this.blockContainerChild = blockContainerChild;
258    }
259
260    @Override public String toString() {
261      return "<" + elementName + ">";
262    }
263  }
264
265  static final ImmutableMap<String, ElementContainmentInfo>
266      ELEMENT_CONTAINMENT_RELATIONSHIPS
267      = new ElementContainmentRelationships().toMap();
268
269  private static class ElementContainmentRelationships {
270    private enum ElementGroup {
271      BLOCK,
272      INLINE,
273      INLINE_MINUS_A,
274      MIXED,
275      TABLE_CONTENT,
276      HEAD_CONTENT,
277      TOP_CONTENT,
278      AREA_ELEMENT,
279      FORM_ELEMENT,
280      LEGEND_ELEMENT,
281      LI_ELEMENT,
282      DL_PART,
283      P_ELEMENT,
284      OPTIONS_ELEMENT,
285      OPTION_ELEMENT,
286      PARAM_ELEMENT,
287      TABLE_ELEMENT,
288      TR_ELEMENT,
289      TD_ELEMENT,
290      COL_ELEMENT,
291      CHARACTER_DATA,
292      ;
293    }
294
295    private static int elementGroupBits(ElementGroup a) {
296      return 1 << a.ordinal();
297    }
298
299    private static int elementGroupBits(
300        ElementGroup a, ElementGroup b) {
301      return (1 << a.ordinal()) | (1 << b.ordinal());
302    }
303
304    private static int elementGroupBits(
305        ElementGroup a, ElementGroup b, ElementGroup c) {
306      return (1 << a.ordinal()) | (1 << b.ordinal()) | (1 << c.ordinal());
307    }
308
309    private static int elementGroupBits(
310        ElementGroup... bits) {
311      int bitField = 0;
312      for (ElementGroup bit : bits) {
313        bitField |= (1 << bit.ordinal());
314      }
315      return bitField;
316    }
317
318    private ImmutableMap.Builder<String, ElementContainmentInfo> definitions
319        = ImmutableMap.builder();
320
321    private ElementContainmentInfo defineElement(
322        String elementName, boolean resumable, int types, int contentTypes) {
323      return defineElement(elementName, resumable, types, contentTypes, null);
324    }
325
326    private ElementContainmentInfo defineElement(
327        String elementName, boolean resumable, int types, int contentTypes,
328        @Nullable ElementContainmentInfo blockContainer) {
329      ElementContainmentInfo info = new ElementContainmentInfo(
330          elementName, resumable, types, contentTypes, blockContainer);
331      definitions.put(elementName, info);
332      return info;
333    }
334
335    private ImmutableMap<String, ElementContainmentInfo> toMap() {
336      return definitions.build();
337    }
338
339    {
340      defineElement(
341          "a", false, elementGroupBits(
342              ElementGroup.INLINE
343          ), elementGroupBits(
344              ElementGroup.INLINE_MINUS_A
345          ));
346      defineElement(
347          "abbr", true, elementGroupBits(
348              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
349          ), elementGroupBits(
350              ElementGroup.INLINE
351          ));
352      defineElement(
353          "acronym", true, elementGroupBits(
354              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
355          ), elementGroupBits(
356              ElementGroup.INLINE
357          ));
358      defineElement(
359          "address", false, elementGroupBits(
360              ElementGroup.BLOCK
361          ), elementGroupBits(
362              ElementGroup.INLINE, ElementGroup.P_ELEMENT
363          ));
364      defineElement(
365          "applet", false, elementGroupBits(
366              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
367          ), elementGroupBits(
368              ElementGroup.BLOCK, ElementGroup.INLINE,
369              ElementGroup.PARAM_ELEMENT
370          ));
371      defineElement(
372          "area", false, elementGroupBits(ElementGroup.AREA_ELEMENT), 0);
373      defineElement(
374          "audio", false, elementGroupBits(
375              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
376          ), 0);
377      defineElement(
378          "b", true, elementGroupBits(
379              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
380          ), elementGroupBits(
381              ElementGroup.INLINE
382          ));
383      defineElement(
384          "base", false, elementGroupBits(ElementGroup.HEAD_CONTENT), 0);
385      defineElement(
386          "basefont", false, elementGroupBits(
387              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
388          ), 0);
389      defineElement(
390          "bdi", true, elementGroupBits(
391              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
392          ), elementGroupBits(
393              ElementGroup.INLINE
394          ));
395      defineElement(
396          "bdo", true, elementGroupBits(
397              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
398          ), elementGroupBits(
399              ElementGroup.INLINE
400          ));
401      defineElement(
402          "big", true, elementGroupBits(
403              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
404          ), elementGroupBits(
405              ElementGroup.INLINE
406          ));
407      defineElement(
408          "blink", true, elementGroupBits(
409              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
410          ), elementGroupBits(
411              ElementGroup.INLINE
412          ));
413      defineElement(
414          "blockquote", false, elementGroupBits(
415              ElementGroup.BLOCK
416          ), elementGroupBits(
417              ElementGroup.BLOCK, ElementGroup.INLINE
418          ));
419      defineElement(
420          "body", false, elementGroupBits(
421              ElementGroup.TOP_CONTENT
422          ), elementGroupBits(
423              ElementGroup.BLOCK, ElementGroup.INLINE
424          ));
425      defineElement(
426          "br", false, elementGroupBits(
427              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
428          ), 0);
429      defineElement(
430          "button", false, elementGroupBits(
431              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
432          ), elementGroupBits(
433              ElementGroup.BLOCK, ElementGroup.INLINE
434          ));
435      defineElement(
436          "canvas", false, elementGroupBits(
437              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
438          ), elementGroupBits(
439              ElementGroup.INLINE
440          ));
441      defineElement(
442          "caption", false, elementGroupBits(
443              ElementGroup.TABLE_CONTENT
444          ), elementGroupBits(
445              ElementGroup.INLINE
446          ));
447      defineElement(
448          "center", false, elementGroupBits(
449              ElementGroup.BLOCK
450          ), elementGroupBits(
451              ElementGroup.BLOCK, ElementGroup.INLINE
452          ));
453      defineElement(
454          "cite", true, elementGroupBits(
455              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
456          ), elementGroupBits(
457              ElementGroup.INLINE
458          ));
459      defineElement(
460          "code", true, elementGroupBits(
461              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
462          ), elementGroupBits(
463              ElementGroup.INLINE
464          ));
465      defineElement(
466          "col", false, elementGroupBits(
467              ElementGroup.TABLE_CONTENT, ElementGroup.COL_ELEMENT
468          ), 0);
469      defineElement(
470          "colgroup", false, elementGroupBits(
471              ElementGroup.TABLE_CONTENT
472          ), elementGroupBits(
473              ElementGroup.COL_ELEMENT
474          ));
475      ElementContainmentInfo DD = defineElement(
476          "dd", false, elementGroupBits(
477              ElementGroup.DL_PART
478          ), elementGroupBits(
479              ElementGroup.BLOCK, ElementGroup.INLINE
480          ));
481      defineElement(
482          "del", true, elementGroupBits(
483              ElementGroup.BLOCK, ElementGroup.INLINE,
484              ElementGroup.MIXED
485          ), elementGroupBits(
486              ElementGroup.BLOCK, ElementGroup.INLINE
487          ));
488      defineElement(
489          "dfn", true, elementGroupBits(
490              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
491          ), elementGroupBits(
492              ElementGroup.INLINE
493          ));
494      defineElement(
495          "dir", false, elementGroupBits(
496              ElementGroup.BLOCK
497          ), elementGroupBits(
498              ElementGroup.LI_ELEMENT
499          ));
500      defineElement(
501          "div", false, elementGroupBits(
502              ElementGroup.BLOCK
503          ), elementGroupBits(
504              ElementGroup.BLOCK, ElementGroup.INLINE
505          ));
506      defineElement(
507          "dl", false, elementGroupBits(
508              ElementGroup.BLOCK
509          ), elementGroupBits(
510              ElementGroup.DL_PART
511          ),
512          DD);
513      defineElement(
514          "dt", false, elementGroupBits(
515              ElementGroup.DL_PART
516          ), elementGroupBits(
517              ElementGroup.INLINE
518          ));
519      defineElement(
520          "em", true, elementGroupBits(
521              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
522          ), elementGroupBits(
523              ElementGroup.INLINE
524          ));
525      defineElement(
526          "fieldset", false, elementGroupBits(
527              ElementGroup.BLOCK
528          ), elementGroupBits(
529              ElementGroup.BLOCK, ElementGroup.INLINE,
530              ElementGroup.LEGEND_ELEMENT
531          ));
532      defineElement(
533          "font", false, elementGroupBits(
534              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
535          ), elementGroupBits(
536              ElementGroup.INLINE
537          ));
538      defineElement(
539          "form", false, elementGroupBits(
540              ElementGroup.BLOCK, ElementGroup.FORM_ELEMENT
541          ), elementGroupBits(
542              ElementGroup.BLOCK, ElementGroup.INLINE,
543              ElementGroup.INLINE_MINUS_A, ElementGroup.TR_ELEMENT,
544              ElementGroup.TD_ELEMENT
545          ));
546      defineElement(
547          "h1", false, elementGroupBits(
548              ElementGroup.BLOCK
549          ), elementGroupBits(
550              ElementGroup.INLINE
551          ));
552      defineElement(
553          "h2", false, elementGroupBits(
554              ElementGroup.BLOCK
555          ), elementGroupBits(
556              ElementGroup.INLINE
557          ));
558      defineElement(
559          "h3", false, elementGroupBits(
560              ElementGroup.BLOCK
561          ), elementGroupBits(
562              ElementGroup.INLINE
563          ));
564      defineElement(
565          "h4", false, elementGroupBits(
566              ElementGroup.BLOCK
567          ), elementGroupBits(
568              ElementGroup.INLINE
569          ));
570      defineElement(
571          "h5", false, elementGroupBits(
572              ElementGroup.BLOCK
573          ), elementGroupBits(
574              ElementGroup.INLINE
575          ));
576      defineElement(
577          "h6", false, elementGroupBits(
578              ElementGroup.BLOCK
579          ), elementGroupBits(
580              ElementGroup.INLINE
581          ));
582      defineElement(
583          "head", false, elementGroupBits(
584              ElementGroup.TOP_CONTENT
585          ), elementGroupBits(
586              ElementGroup.HEAD_CONTENT
587          ));
588      defineElement(
589          "hr", false, elementGroupBits(ElementGroup.BLOCK), 0);
590      defineElement(
591          "html", false, 0, elementGroupBits(ElementGroup.TOP_CONTENT));
592      defineElement(
593          "i", true, elementGroupBits(
594              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
595          ), elementGroupBits(
596              ElementGroup.INLINE
597          ));
598      defineElement(
599          "iframe", false, elementGroupBits(
600              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
601          ), elementGroupBits(
602              ElementGroup.BLOCK, ElementGroup.INLINE
603          ));
604      defineElement(
605          "img", false, elementGroupBits(
606              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
607          ), 0);
608      defineElement(
609          "input", false, elementGroupBits(
610              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
611          ), 0);
612      defineElement(
613          "ins", true, elementGroupBits(
614              ElementGroup.BLOCK, ElementGroup.INLINE
615          ), elementGroupBits(
616              ElementGroup.BLOCK, ElementGroup.INLINE
617          ));
618      defineElement(
619          "isindex", false, elementGroupBits(ElementGroup.INLINE), 0);
620      defineElement(
621          "kbd", true, elementGroupBits(
622              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
623          ), elementGroupBits(
624              ElementGroup.INLINE
625          ));
626      defineElement(
627          "label", false, elementGroupBits(
628              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
629          ), elementGroupBits(
630              ElementGroup.INLINE
631          ));
632      defineElement(
633          "legend", false, elementGroupBits(
634              ElementGroup.LEGEND_ELEMENT
635          ), elementGroupBits(
636              ElementGroup.INLINE
637          ));
638      ElementContainmentInfo LI = defineElement(
639          "li", false, elementGroupBits(
640              ElementGroup.LI_ELEMENT
641          ), elementGroupBits(
642              ElementGroup.BLOCK, ElementGroup.INLINE
643          ));
644      defineElement(
645          "link", false, elementGroupBits(
646              ElementGroup.INLINE, ElementGroup.HEAD_CONTENT
647          ), 0);
648      defineElement(
649          "listing", false, elementGroupBits(
650              ElementGroup.BLOCK
651          ), elementGroupBits(
652              ElementGroup.INLINE
653          ));
654      defineElement(
655          "map", false, elementGroupBits(
656              ElementGroup.INLINE
657          ), elementGroupBits(
658              ElementGroup.BLOCK, ElementGroup.AREA_ELEMENT
659          ));
660      defineElement(
661          "meta", false, elementGroupBits(ElementGroup.HEAD_CONTENT), 0);
662      defineElement(
663          "nobr", false, elementGroupBits(
664              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
665          ), elementGroupBits(
666              ElementGroup.INLINE
667          ));
668      defineElement(
669          "noframes", false, elementGroupBits(
670              ElementGroup.BLOCK, ElementGroup.TOP_CONTENT
671          ), elementGroupBits(
672              ElementGroup.BLOCK, ElementGroup.INLINE,
673              ElementGroup.TOP_CONTENT
674          ));
675      defineElement(
676          "noscript", false, elementGroupBits(
677              ElementGroup.BLOCK
678          ), elementGroupBits(
679              ElementGroup.BLOCK, ElementGroup.INLINE
680          ));
681      defineElement(
682          "object", false, elementGroupBits(
683              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A,
684              ElementGroup.HEAD_CONTENT
685          ), elementGroupBits(
686              ElementGroup.BLOCK, ElementGroup.INLINE,
687              ElementGroup.PARAM_ELEMENT
688          ));
689      defineElement(
690          "ol", false, elementGroupBits(
691              ElementGroup.BLOCK
692          ), elementGroupBits(
693              ElementGroup.LI_ELEMENT
694          ),
695          LI);
696      defineElement(
697          "optgroup", false, elementGroupBits(
698              ElementGroup.OPTIONS_ELEMENT
699          ), elementGroupBits(
700              ElementGroup.OPTIONS_ELEMENT
701          ));
702      defineElement(
703          "option", false, elementGroupBits(
704              ElementGroup.OPTIONS_ELEMENT, ElementGroup.OPTION_ELEMENT
705          ), elementGroupBits(
706              ElementGroup.CHARACTER_DATA
707          ));
708      defineElement(
709          "p", false, elementGroupBits(
710              ElementGroup.BLOCK, ElementGroup.P_ELEMENT
711          ), elementGroupBits(
712              ElementGroup.INLINE, ElementGroup.TABLE_ELEMENT
713          ));
714      defineElement(
715          "param", false, elementGroupBits(ElementGroup.PARAM_ELEMENT), 0);
716      defineElement(
717          "pre", false, elementGroupBits(
718              ElementGroup.BLOCK
719          ), elementGroupBits(
720              ElementGroup.INLINE
721          ));
722      defineElement(
723          "q", true, elementGroupBits(
724              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
725          ), elementGroupBits(
726              ElementGroup.INLINE
727          ));
728      defineElement(
729          "s", true, elementGroupBits(
730              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
731          ), elementGroupBits(
732              ElementGroup.INLINE
733          ));
734      defineElement(
735          "samp", true, elementGroupBits(
736              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
737          ), elementGroupBits(
738              ElementGroup.INLINE
739          ));
740      defineElement(
741          "script", false, elementGroupBits(
742              ElementGroup.BLOCK, ElementGroup.INLINE,
743              ElementGroup.INLINE_MINUS_A, ElementGroup.MIXED,
744              ElementGroup.TABLE_CONTENT, ElementGroup.HEAD_CONTENT,
745              ElementGroup.TOP_CONTENT, ElementGroup.AREA_ELEMENT,
746              ElementGroup.FORM_ELEMENT, ElementGroup.LEGEND_ELEMENT,
747              ElementGroup.LI_ELEMENT, ElementGroup.DL_PART,
748              ElementGroup.P_ELEMENT, ElementGroup.OPTIONS_ELEMENT,
749              ElementGroup.OPTION_ELEMENT, ElementGroup.PARAM_ELEMENT,
750              ElementGroup.TABLE_ELEMENT, ElementGroup.TR_ELEMENT,
751              ElementGroup.TD_ELEMENT, ElementGroup.COL_ELEMENT
752          ), elementGroupBits(
753              ElementGroup.CHARACTER_DATA));
754      defineElement(
755          "select", false, elementGroupBits(
756              ElementGroup.INLINE
757          ), elementGroupBits(
758              ElementGroup.OPTIONS_ELEMENT
759          ));
760      defineElement(
761          "small", true, elementGroupBits(
762              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
763          ), elementGroupBits(
764              ElementGroup.INLINE
765          ));
766      defineElement(
767          "span", false, elementGroupBits(
768              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
769          ), elementGroupBits(
770              ElementGroup.INLINE
771          ));
772      defineElement(
773          "strike", true, elementGroupBits(
774              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
775          ), elementGroupBits(
776              ElementGroup.INLINE
777          ));
778      defineElement(
779          "strong", true, elementGroupBits(
780              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
781          ), elementGroupBits(
782              ElementGroup.INLINE
783          ));
784      defineElement(
785          "style", false, elementGroupBits(
786              ElementGroup.INLINE, ElementGroup.HEAD_CONTENT
787          ), elementGroupBits(
788              ElementGroup.CHARACTER_DATA
789          ));
790      defineElement(
791          "sub", true, elementGroupBits(
792              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
793          ), elementGroupBits(
794              ElementGroup.INLINE
795          ));
796      defineElement(
797          "sup", true, elementGroupBits(
798              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
799          ), elementGroupBits(
800              ElementGroup.INLINE
801          ));
802      defineElement(
803          "table", false, elementGroupBits(
804              ElementGroup.BLOCK, ElementGroup.TABLE_ELEMENT
805          ), elementGroupBits(
806              ElementGroup.TABLE_CONTENT, ElementGroup.FORM_ELEMENT
807          ));
808      defineElement(
809          "tbody", false, elementGroupBits(
810              ElementGroup.TABLE_CONTENT
811          ), elementGroupBits(
812              ElementGroup.TR_ELEMENT
813          ));
814      ElementContainmentInfo TD = defineElement(
815          "td", false, elementGroupBits(
816              ElementGroup.TD_ELEMENT
817          ), elementGroupBits(
818              ElementGroup.BLOCK, ElementGroup.INLINE
819          ));
820      defineElement(
821          "textarea", false,
822          // No, a textarea cannot be inside a link.
823          elementGroupBits(ElementGroup.INLINE),
824          elementGroupBits(ElementGroup.CHARACTER_DATA));
825      defineElement(
826          "tfoot", false, elementGroupBits(
827              ElementGroup.TABLE_CONTENT
828          ), elementGroupBits(
829              ElementGroup.FORM_ELEMENT, ElementGroup.TR_ELEMENT,
830              ElementGroup.TD_ELEMENT
831          ));
832      defineElement(
833          "th", false, elementGroupBits(
834              ElementGroup.TD_ELEMENT
835          ), elementGroupBits(
836              ElementGroup.BLOCK, ElementGroup.INLINE
837          ));
838      defineElement(
839          "thead", false, elementGroupBits(
840              ElementGroup.TABLE_CONTENT
841          ), elementGroupBits(
842              ElementGroup.FORM_ELEMENT, ElementGroup.TR_ELEMENT,
843              ElementGroup.TD_ELEMENT
844          ));
845      defineElement(
846          "title", false, elementGroupBits(ElementGroup.HEAD_CONTENT),
847          elementGroupBits(ElementGroup.CHARACTER_DATA));
848      defineElement(
849          "tr", false, elementGroupBits(
850              ElementGroup.TABLE_CONTENT, ElementGroup.TR_ELEMENT
851          ), elementGroupBits(
852              ElementGroup.FORM_ELEMENT, ElementGroup.TD_ELEMENT
853          ),
854          TD);
855      defineElement(
856          "tt", true, elementGroupBits(
857              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
858          ), elementGroupBits(
859              ElementGroup.INLINE
860          ));
861      defineElement(
862          "u", true, elementGroupBits(
863              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
864          ), elementGroupBits(
865              ElementGroup.INLINE
866          ));
867      defineElement(
868          "ul", false, elementGroupBits(
869              ElementGroup.BLOCK
870          ), elementGroupBits(
871              ElementGroup.LI_ELEMENT
872          ),
873          LI);
874      defineElement(
875          "var", false, elementGroupBits(
876              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
877          ), elementGroupBits(
878              ElementGroup.INLINE
879          ));
880      defineElement(
881          "video", false, elementGroupBits(
882              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
883          ), 0);
884      defineElement(
885          "wbr", false, elementGroupBits(
886              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
887          ), 0);
888      defineElement(
889          "xmp", false, elementGroupBits(
890              ElementGroup.BLOCK
891          ), elementGroupBits(
892              ElementGroup.INLINE
893          ));
894
895    }
896
897    private static final ElementContainmentInfo CHARACTER_DATA
898        = new ElementContainmentInfo(
899            "#text", false,
900            elementGroupBits(
901                ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A,
902                ElementGroup.BLOCK, ElementGroup.CHARACTER_DATA),
903            0, null);
904  }
905
906  static boolean allowsPlainTextualContent(String canonElementName) {
907    ElementContainmentInfo info =
908       ELEMENT_CONTAINMENT_RELATIONSHIPS.get(canonElementName);
909    if (info == null || (info.contents & ElementContainmentRelationships.CHARACTER_DATA.types) != 0) {
910      switch (HtmlTextEscapingMode.getModeForTag(canonElementName)) {
911        case PCDATA:     return true;
912        case RCDATA:     return true;
913        case PLAIN_TEXT: return true;
914        case VOID:       return false;
915        case CDATA:
916        case CDATA_SOMETIMES:
917          return "xmp".equals(canonElementName)
918              || "listing".equals(canonElementName);
919      }
920    }
921    return false;
922  }
923}
924