1// Copyright (c) 2011, Mike Samuel
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions
6// are met:
7//
8// Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// Redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution.
13// Neither the name of the OWASP nor the names of its contributors may
14// be used to endorse or promote products derived from this software
15// without specific prior written permission.
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28
29package org.owasp.html;
30
31import java.util.List;
32
33import javax.annotation.Nullable;
34import javax.annotation.concurrent.Immutable;
35
36import com.google.common.collect.ImmutableMap;
37import com.google.common.collect.Lists;
38
39/**
40 * Wraps an HTML stream event receiver to fill in missing close tags.
41 * If the balancer is given the HTML {@code <p>1<p>2}, the wrapped receiver will
42 * see events equivalent to {@code <p>1</p><p>2</p>}.
43 *
44 * @author Mike Samuel <mikesamuel@gmail.com>
45 */
46@TCB
47public class TagBalancingHtmlStreamEventReceiver
48    implements HtmlStreamEventReceiver {
49  private final HtmlStreamEventReceiver underlying;
50  private int nestingLimit = Integer.MAX_VALUE;
51  private final List<ElementContainmentInfo> openElements
52      = Lists.newArrayList();
53
54  public TagBalancingHtmlStreamEventReceiver(
55      HtmlStreamEventReceiver underlying) {
56    this.underlying = underlying;
57  }
58
59  public void setNestingLimit(int limit) {
60    if (openElements.size() > limit) {
61      throw new IllegalStateException();
62    }
63    this.nestingLimit = limit;
64  }
65
66  public void openDocument() {
67    underlying.openDocument();
68  }
69
70  public void closeDocument() {
71    for (int i = Math.min(nestingLimit, openElements.size()); --i >= 0;) {
72      underlying.closeTag(openElements.get(i).elementName);
73    }
74    openElements.clear();
75    underlying.closeDocument();
76  }
77
78  public void openTag(String elementName, List<String> attrs) {
79    String canonElementName = HtmlLexer.canonicalName(elementName);
80    ElementContainmentInfo elInfo = ELEMENT_CONTAINMENT_RELATIONSHIPS.get(
81        canonElementName);
82    // Treat unrecognized tags as void, but emit closing tags in closeTag().
83    if (elInfo == null) {
84      if (openElements.size() < nestingLimit) {
85        underlying.openTag(elementName, attrs);
86      }
87      return;
88    }
89
90    prepareForContent(elInfo);
91
92    if (openElements.size() < nestingLimit) {
93      underlying.openTag(elInfo.elementName, attrs);
94    }
95    if (!elInfo.isVoid) {
96      openElements.add(elInfo);
97    }
98  }
99
100  private void prepareForContent(ElementContainmentInfo elInfo) {
101    int nOpen = openElements.size();
102    if (nOpen != 0) {
103      ElementContainmentInfo top = openElements.get(nOpen - 1);
104      if ((top.contents & elInfo.types) == 0) {
105        ElementContainmentInfo blockContainerChild = top.blockContainerChild;
106        // Open implied elements, such as list-items and table cells & rows.
107        if (blockContainerChild != null
108            && (blockContainerChild.contents & elInfo.types) != 0) {
109          underlying.openTag(
110              blockContainerChild.elementName, Lists.<String>newArrayList());
111          openElements.add(blockContainerChild);
112          top = blockContainerChild;
113          ++nOpen;
114        }
115      }
116
117      // Close all the elements that cannot contain the element to open.
118      List<ElementContainmentInfo> toResumeInReverse = null;
119      while (true) {
120        if ((top.contents & elInfo.types) != 0) { break; }
121        if (openElements.size() < nestingLimit) {
122          underlying.closeTag(top.elementName);
123        }
124        openElements.remove(--nOpen);
125        if (top.resumable) {
126          if (toResumeInReverse == null) {
127            toResumeInReverse = Lists.newArrayList();
128          }
129          toResumeInReverse.add(top);
130        }
131        if (nOpen == 0) { break; }
132        top = openElements.get(nOpen - 1);
133      }
134
135      if (toResumeInReverse != null) {
136        resume(toResumeInReverse);
137      }
138    }
139  }
140
141  public void closeTag(String elementName) {
142    String canonElementName = HtmlLexer.canonicalName(elementName);
143    ElementContainmentInfo elInfo = ELEMENT_CONTAINMENT_RELATIONSHIPS.get(
144        canonElementName);
145    if (elInfo == null) {  // Allow unrecognized end tags through.
146      if (openElements.size() < nestingLimit) {
147        underlying.closeTag(elementName);
148      }
149      return;
150    }
151    int index = openElements.lastIndexOf(elInfo);
152    // Let any of </h1>, </h2>, ... close other header tags.
153    if (isHeaderElementName(canonElementName)) {
154      for (int i = openElements.size(), limit = index + 1; -- i >= limit;) {
155        ElementContainmentInfo openEl = openElements.get(i);
156        if (isHeaderElementName(openEl.elementName)) {
157          elInfo = openEl;
158          index = i;
159          canonElementName = openEl.elementName;
160          break;
161        }
162      }
163    }
164    if (index < 0) {
165      return;  // Don't close unopened tags.
166    }
167
168    // Ensure that index is in the scope of closeable elements.
169    // This approximates the "has an element in *** scope" predicates defined at
170    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
171    // #has-an-element-in-the-specific-scope
172    int blockingScopes = elInfo.blockedByScopes;
173    for (int i = openElements.size(); --i > index;) {
174      if ((openElements.get(i).inScopes & blockingScopes) != 0) {
175        return;
176      }
177    }
178
179    int last = openElements.size();
180    // Close all the elements that cannot contain the element to open.
181    List<ElementContainmentInfo> toResumeInReverse = null;
182    while (--last > index) {
183      ElementContainmentInfo unclosed = openElements.remove(last);
184      if (last + 1 < nestingLimit) {
185        underlying.closeTag(unclosed.elementName);
186      }
187      if (unclosed.resumable) {
188        if (toResumeInReverse == null) {
189          toResumeInReverse = Lists.newArrayList();
190        }
191        toResumeInReverse.add(unclosed);
192      }
193    }
194    if (openElements.size() < nestingLimit) {
195      underlying.closeTag(elInfo.elementName);
196    }
197    openElements.remove(index);
198    if (toResumeInReverse != null) {
199      resume(toResumeInReverse);
200    }
201  }
202
203  private void resume(List<ElementContainmentInfo> toResumeInReverse) {
204    for (ElementContainmentInfo toResume : toResumeInReverse) {
205      // TODO: If resuming of things other than plain formatting tags like <b>
206      // and <i>, then we need to store the attributes for resumable tags so
207      // that we can resume with the appropriate attributes.
208      if (openElements.size() < nestingLimit) {
209        underlying.openTag(toResume.elementName, Lists.<String>newArrayList());
210      }
211      openElements.add(toResume);
212    }
213  }
214
215  private static final long HTML_SPACE_CHAR_BITMASK =
216      (1L << ' ')
217    | (1L << '\t')
218    | (1L << '\n')
219    | (1L << '\u000c')
220    | (1L << '\r');
221
222  public static boolean isInterElementWhitespace(String text) {
223    int n = text.length();
224    for (int i = 0; i < n; ++i) {
225      int ch = text.charAt(i);
226      if (ch > 0x20 || (HTML_SPACE_CHAR_BITMASK & (1L << ch)) == 0) {
227        return false;
228      }
229    }
230    return true;
231  }
232
233  public void text(String text) {
234    if (!isInterElementWhitespace(text)) {
235      prepareForContent(ElementContainmentRelationships.CHARACTER_DATA_ONLY);
236    }
237
238    if (openElements.size() < nestingLimit) {
239      underlying.text(text);
240    }
241  }
242
243  private static boolean isHeaderElementName(String canonElementName) {
244    return canonElementName.length() == 2 && canonElementName.charAt(0) == 'h'
245        && canonElementName.charAt(1) <= '9';
246  }
247
248
249  @Immutable
250  private static final class ElementContainmentInfo {
251    final String elementName;
252    /**
253     * True if the adoption agency algorithm allows an element to be resumed
254     * after a mis-nested end tag closes it.
255     * E.g. in {@code <b>Foo<i>Bar</b>Baz</i>} the {@code <i>} element is
256     * resumed after the {@code <b>} element is closed.
257     */
258    final boolean resumable;
259    /** A set of bits of element groups into which the element falls. */
260    final int types;
261    /** The type of elements that an element can contain. */
262    final int contents;
263    /** True if the element has no content -- not even text content. */
264    final boolean isVoid;
265    /** A legal child of this node that can contain block content. */
266    final @Nullable ElementContainmentInfo blockContainerChild;
267    /** A bit set of close tag scopes that block this element's close tags. */
268    final int blockedByScopes;
269    /** A bit set of scopes groups into which this element falls. */
270    final int inScopes;
271
272    ElementContainmentInfo(
273        String elementName, boolean resumable, int types, int contents,
274        @Nullable ElementContainmentInfo blockContainerChild,
275        int inScopes) {
276      this.elementName = elementName;
277      this.resumable = resumable;
278      this.types = types;
279      this.contents = contents;
280      this.isVoid = contents == 0
281          && HtmlTextEscapingMode.isVoidElement(elementName);
282      this.blockContainerChild = blockContainerChild;
283      this.blockedByScopes =
284          ElementContainmentRelationships.CloseTagScope.ALL & ~inScopes;
285      this.inScopes = inScopes;
286    }
287
288    @Override public String toString() {
289      return "<" + elementName + ">";
290    }
291  }
292
293  static final ImmutableMap<String, ElementContainmentInfo>
294      ELEMENT_CONTAINMENT_RELATIONSHIPS
295      = new ElementContainmentRelationships().toMap();
296
297  private static class ElementContainmentRelationships {
298    private enum ElementGroup {
299      BLOCK,
300      INLINE,
301      INLINE_MINUS_A,
302      MIXED,
303      TABLE_CONTENT,
304      HEAD_CONTENT,
305      TOP_CONTENT,
306      AREA_ELEMENT,
307      FORM_ELEMENT,
308      LEGEND_ELEMENT,
309      LI_ELEMENT,
310      DL_PART,
311      P_ELEMENT,
312      OPTIONS_ELEMENT,
313      OPTION_ELEMENT,
314      PARAM_ELEMENT,
315      TABLE_ELEMENT,
316      TR_ELEMENT,
317      TD_ELEMENT,
318      COL_ELEMENT,
319      CHARACTER_DATA,
320      ;
321    }
322
323    /**
324     * An identifier for one of the "has a *** element in scope" predicates
325     * used by HTML5 to decide when a close tag implicitly closes tags above
326     * the target element on the open element stack.
327     */
328    private enum CloseTagScope {
329      COMMON,
330      BUTTON,
331      LIST_ITEM,
332      TABLE,
333      ;
334
335      static final int ALL = (1 << values().length) - 1;
336    }
337
338    private static int elementGroupBits(ElementGroup a) {
339      return 1 << a.ordinal();
340    }
341
342    private static int elementGroupBits(
343        ElementGroup a, ElementGroup b) {
344      return (1 << a.ordinal()) | (1 << b.ordinal());
345    }
346
347    private static int elementGroupBits(
348        ElementGroup a, ElementGroup b, ElementGroup c) {
349      return (1 << a.ordinal()) | (1 << b.ordinal()) | (1 << c.ordinal());
350    }
351
352    private static int elementGroupBits(
353        ElementGroup... bits) {
354      int bitField = 0;
355      for (ElementGroup bit : bits) {
356        bitField |= (1 << bit.ordinal());
357      }
358      return bitField;
359    }
360
361    private static int scopeBits(CloseTagScope a) {
362      return 1 << a.ordinal();
363    }
364
365    private static int scopeBits(
366        CloseTagScope a, CloseTagScope b, CloseTagScope c) {
367      return (1 << a.ordinal()) | (1 << b.ordinal()) | (1 << c.ordinal());
368    }
369
370    private ImmutableMap.Builder<String, ElementContainmentInfo> definitions
371        = ImmutableMap.builder();
372
373    private ElementContainmentInfo defineElement(
374        String elementName, boolean resumable, int types, int contentTypes) {
375      return defineElement(elementName, resumable, types, contentTypes, null);
376    }
377
378    private ElementContainmentInfo defineElement(
379        String elementName, boolean resumable, int types, int contentTypes,
380        int inScopes) {
381      return defineElement(
382          elementName, resumable, types, contentTypes, null, inScopes);
383    }
384
385    private ElementContainmentInfo defineElement(
386        String elementName, boolean resumable, int types, int contentTypes,
387        @Nullable ElementContainmentInfo blockContainer) {
388      return defineElement(
389          elementName, resumable, types, contentTypes, blockContainer, 0);
390    }
391
392    private ElementContainmentInfo defineElement(
393        String elementName, boolean resumable, int types, int contentTypes,
394        @Nullable ElementContainmentInfo blockContainer, int inScopes) {
395      ElementContainmentInfo info = new ElementContainmentInfo(
396          elementName, resumable, types, contentTypes, blockContainer,
397          inScopes);
398      definitions.put(elementName, info);
399      return info;
400    }
401
402    private ImmutableMap<String, ElementContainmentInfo> toMap() {
403      return definitions.build();
404    }
405
406    {
407      defineElement(
408          "a", false, elementGroupBits(
409              ElementGroup.INLINE
410          ), elementGroupBits(
411              ElementGroup.INLINE_MINUS_A
412          ));
413      defineElement(
414          "abbr", true, elementGroupBits(
415              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
416          ), elementGroupBits(
417              ElementGroup.INLINE
418          ));
419      defineElement(
420          "acronym", true, elementGroupBits(
421              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
422          ), elementGroupBits(
423              ElementGroup.INLINE
424          ));
425      defineElement(
426          "address", false, elementGroupBits(
427              ElementGroup.BLOCK
428          ), elementGroupBits(
429              ElementGroup.INLINE, ElementGroup.P_ELEMENT
430          ));
431      defineElement(
432          "applet", false, elementGroupBits(
433              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
434          ), elementGroupBits(
435              ElementGroup.BLOCK, ElementGroup.INLINE,
436              ElementGroup.PARAM_ELEMENT
437          ), scopeBits(
438              CloseTagScope.COMMON, CloseTagScope.BUTTON,
439              CloseTagScope.LIST_ITEM
440          ));
441      defineElement(
442          "area", false, elementGroupBits(ElementGroup.AREA_ELEMENT), 0);
443      defineElement(
444          "audio", false, elementGroupBits(
445              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
446          ), 0);
447      defineElement(
448          "b", true, elementGroupBits(
449              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
450          ), elementGroupBits(
451              ElementGroup.INLINE
452          ));
453      defineElement(
454          "base", false, elementGroupBits(ElementGroup.HEAD_CONTENT), 0);
455      defineElement(
456          "basefont", false, elementGroupBits(
457              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
458          ), 0);
459      defineElement(
460          "bdi", true, elementGroupBits(
461              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
462          ), elementGroupBits(
463              ElementGroup.INLINE
464          ));
465      defineElement(
466          "bdo", true, elementGroupBits(
467              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
468          ), elementGroupBits(
469              ElementGroup.INLINE
470          ));
471      defineElement(
472          "big", true, elementGroupBits(
473              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
474          ), elementGroupBits(
475              ElementGroup.INLINE
476          ));
477      defineElement(
478          "blink", true, elementGroupBits(
479              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
480          ), elementGroupBits(
481              ElementGroup.INLINE
482          ));
483      defineElement(
484          "blockquote", false, elementGroupBits(
485              ElementGroup.BLOCK
486          ), elementGroupBits(
487              ElementGroup.BLOCK, ElementGroup.INLINE
488          ));
489      defineElement(
490          "body", false, elementGroupBits(
491              ElementGroup.TOP_CONTENT
492          ), elementGroupBits(
493              ElementGroup.BLOCK, ElementGroup.INLINE
494          ));
495      defineElement(
496          "br", false, elementGroupBits(
497              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
498          ), 0);
499      defineElement(
500          "button", false, elementGroupBits(
501              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
502          ), elementGroupBits(
503              ElementGroup.BLOCK, ElementGroup.INLINE
504          ), scopeBits(CloseTagScope.BUTTON));
505      defineElement(
506          "canvas", false, elementGroupBits(
507              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
508          ), elementGroupBits(
509              ElementGroup.INLINE
510          ));
511      defineElement(
512          "caption", false, elementGroupBits(
513              ElementGroup.TABLE_CONTENT
514          ), elementGroupBits(
515              ElementGroup.INLINE
516          ), scopeBits(
517              CloseTagScope.COMMON, CloseTagScope.BUTTON,
518              CloseTagScope.LIST_ITEM
519          ));
520      defineElement(
521          "center", false, elementGroupBits(
522              ElementGroup.BLOCK
523          ), elementGroupBits(
524              ElementGroup.BLOCK, ElementGroup.INLINE
525          ));
526      defineElement(
527          "cite", true, elementGroupBits(
528              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
529          ), elementGroupBits(
530              ElementGroup.INLINE
531          ));
532      defineElement(
533          "code", true, elementGroupBits(
534              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
535          ), elementGroupBits(
536              ElementGroup.INLINE
537          ));
538      defineElement(
539          "col", false, elementGroupBits(
540              ElementGroup.TABLE_CONTENT, ElementGroup.COL_ELEMENT
541          ), 0);
542      defineElement(
543          "colgroup", false, elementGroupBits(
544              ElementGroup.TABLE_CONTENT
545          ), elementGroupBits(
546              ElementGroup.COL_ELEMENT
547          ));
548      ElementContainmentInfo DD = defineElement(
549          "dd", false, elementGroupBits(
550              ElementGroup.DL_PART
551          ), elementGroupBits(
552              ElementGroup.BLOCK, ElementGroup.INLINE
553          ));
554      defineElement(
555          "del", true, elementGroupBits(
556              ElementGroup.BLOCK, ElementGroup.INLINE,
557              ElementGroup.MIXED
558          ), elementGroupBits(
559              ElementGroup.BLOCK, ElementGroup.INLINE
560          ));
561      defineElement(
562          "dfn", true, elementGroupBits(
563              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
564          ), elementGroupBits(
565              ElementGroup.INLINE
566          ));
567      defineElement(
568          "dir", false, elementGroupBits(
569              ElementGroup.BLOCK
570          ), elementGroupBits(
571              ElementGroup.LI_ELEMENT
572          ));
573      defineElement(
574          "div", false, elementGroupBits(
575              ElementGroup.BLOCK
576          ), elementGroupBits(
577              ElementGroup.BLOCK, ElementGroup.INLINE
578          ));
579      defineElement(
580          "dl", false, elementGroupBits(
581              ElementGroup.BLOCK
582          ), elementGroupBits(
583              ElementGroup.DL_PART
584          ),
585          DD);
586      defineElement(
587          "dt", false, elementGroupBits(
588              ElementGroup.DL_PART
589          ), elementGroupBits(
590              ElementGroup.INLINE
591          ));
592      defineElement(
593          "em", true, elementGroupBits(
594              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
595          ), elementGroupBits(
596              ElementGroup.INLINE
597          ));
598      defineElement(
599          "fieldset", false, elementGroupBits(
600              ElementGroup.BLOCK
601          ), elementGroupBits(
602              ElementGroup.BLOCK, ElementGroup.INLINE,
603              ElementGroup.LEGEND_ELEMENT
604          ));
605      defineElement(
606          "font", false, elementGroupBits(
607              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
608          ), elementGroupBits(
609              ElementGroup.INLINE
610          ));
611      defineElement(
612          "form", false, elementGroupBits(
613              ElementGroup.BLOCK, ElementGroup.FORM_ELEMENT
614          ), elementGroupBits(
615              ElementGroup.BLOCK, ElementGroup.INLINE,
616              ElementGroup.INLINE_MINUS_A, ElementGroup.TR_ELEMENT,
617              ElementGroup.TD_ELEMENT
618          ));
619      defineElement(
620          "h1", false, elementGroupBits(
621              ElementGroup.BLOCK
622          ), elementGroupBits(
623              ElementGroup.INLINE
624          ));
625      defineElement(
626          "h2", false, elementGroupBits(
627              ElementGroup.BLOCK
628          ), elementGroupBits(
629              ElementGroup.INLINE
630          ));
631      defineElement(
632          "h3", false, elementGroupBits(
633              ElementGroup.BLOCK
634          ), elementGroupBits(
635              ElementGroup.INLINE
636          ));
637      defineElement(
638          "h4", false, elementGroupBits(
639              ElementGroup.BLOCK
640          ), elementGroupBits(
641              ElementGroup.INLINE
642          ));
643      defineElement(
644          "h5", false, elementGroupBits(
645              ElementGroup.BLOCK
646          ), elementGroupBits(
647              ElementGroup.INLINE
648          ));
649      defineElement(
650          "h6", false, elementGroupBits(
651              ElementGroup.BLOCK
652          ), elementGroupBits(
653              ElementGroup.INLINE
654          ));
655      defineElement(
656          "head", false, elementGroupBits(
657              ElementGroup.TOP_CONTENT
658          ), elementGroupBits(
659              ElementGroup.HEAD_CONTENT
660          ));
661      defineElement(
662          "hr", false, elementGroupBits(ElementGroup.BLOCK), 0);
663      defineElement(
664          "html", false, 0, elementGroupBits(ElementGroup.TOP_CONTENT),
665          CloseTagScope.ALL);
666      defineElement(
667          "i", true, elementGroupBits(
668              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
669          ), elementGroupBits(
670              ElementGroup.INLINE
671          ));
672      defineElement(
673          "iframe", false, elementGroupBits(
674              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
675          ), elementGroupBits(
676              ElementGroup.BLOCK, ElementGroup.INLINE
677          ));
678      defineElement(
679          "img", false, elementGroupBits(
680              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
681          ), 0);
682      defineElement(
683          "input", false, elementGroupBits(
684              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
685          ), 0);
686      defineElement(
687          "ins", true, elementGroupBits(
688              ElementGroup.BLOCK, ElementGroup.INLINE
689          ), elementGroupBits(
690              ElementGroup.BLOCK, ElementGroup.INLINE
691          ));
692      defineElement(
693          "isindex", false, elementGroupBits(ElementGroup.INLINE), 0);
694      defineElement(
695          "kbd", true, elementGroupBits(
696              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
697          ), elementGroupBits(
698              ElementGroup.INLINE
699          ));
700      defineElement(
701          "label", false, elementGroupBits(
702              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
703          ), elementGroupBits(
704              ElementGroup.INLINE
705          ));
706      defineElement(
707          "legend", false, elementGroupBits(
708              ElementGroup.LEGEND_ELEMENT
709          ), elementGroupBits(
710              ElementGroup.INLINE
711          ));
712      ElementContainmentInfo LI = defineElement(
713          "li", false, elementGroupBits(
714              ElementGroup.LI_ELEMENT
715          ), elementGroupBits(
716              ElementGroup.BLOCK, ElementGroup.INLINE
717          ));
718      defineElement(
719          "link", false, elementGroupBits(
720              ElementGroup.INLINE, ElementGroup.HEAD_CONTENT
721          ), 0);
722      defineElement(
723          "listing", false, elementGroupBits(
724              ElementGroup.BLOCK
725          ), elementGroupBits(
726              ElementGroup.INLINE
727          ));
728      defineElement(
729          "map", false, elementGroupBits(
730              ElementGroup.INLINE
731          ), elementGroupBits(
732              ElementGroup.BLOCK, ElementGroup.AREA_ELEMENT
733          ));
734      defineElement(
735          "meta", false, elementGroupBits(ElementGroup.HEAD_CONTENT), 0);
736      defineElement(
737          "nobr", false, elementGroupBits(
738              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
739          ), elementGroupBits(
740              ElementGroup.INLINE
741          ));
742      defineElement(
743          "noframes", false, elementGroupBits(
744              ElementGroup.BLOCK, ElementGroup.TOP_CONTENT
745          ), elementGroupBits(
746              ElementGroup.BLOCK, ElementGroup.INLINE,
747              ElementGroup.TOP_CONTENT
748          ));
749      defineElement(
750          "noscript", false, elementGroupBits(
751              ElementGroup.BLOCK
752          ), elementGroupBits(
753              ElementGroup.BLOCK, ElementGroup.INLINE
754          ));
755      defineElement(
756          "object", false, elementGroupBits(
757              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A,
758              ElementGroup.HEAD_CONTENT
759          ), elementGroupBits(
760              ElementGroup.BLOCK, ElementGroup.INLINE,
761              ElementGroup.PARAM_ELEMENT
762          ), scopeBits(
763              CloseTagScope.COMMON, CloseTagScope.BUTTON,
764              CloseTagScope.LIST_ITEM
765          ));
766      defineElement(
767          "ol", false, elementGroupBits(
768              ElementGroup.BLOCK
769          ), elementGroupBits(
770              ElementGroup.LI_ELEMENT
771          ),
772          LI,
773          scopeBits(CloseTagScope.LIST_ITEM));
774      defineElement(
775          "optgroup", false, elementGroupBits(
776              ElementGroup.OPTIONS_ELEMENT
777          ), elementGroupBits(
778              ElementGroup.OPTIONS_ELEMENT
779          ));
780      defineElement(
781          "option", false, elementGroupBits(
782              ElementGroup.OPTIONS_ELEMENT, ElementGroup.OPTION_ELEMENT
783          ), elementGroupBits(
784              ElementGroup.CHARACTER_DATA
785          ));
786      defineElement(
787          "p", false, elementGroupBits(
788              ElementGroup.BLOCK, ElementGroup.P_ELEMENT
789          ), elementGroupBits(
790              ElementGroup.INLINE, ElementGroup.TABLE_ELEMENT
791          ));
792      defineElement(
793          "param", false, elementGroupBits(ElementGroup.PARAM_ELEMENT), 0);
794      defineElement(
795          "pre", false, elementGroupBits(
796              ElementGroup.BLOCK
797          ), elementGroupBits(
798              ElementGroup.INLINE
799          ));
800      defineElement(
801          "q", true, elementGroupBits(
802              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
803          ), elementGroupBits(
804              ElementGroup.INLINE
805          ));
806      defineElement(
807          "s", true, elementGroupBits(
808              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
809          ), elementGroupBits(
810              ElementGroup.INLINE
811          ));
812      defineElement(
813          "samp", true, elementGroupBits(
814              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
815          ), elementGroupBits(
816              ElementGroup.INLINE
817          ));
818      defineElement(
819          "script", false, elementGroupBits(
820              ElementGroup.BLOCK, ElementGroup.INLINE,
821              ElementGroup.INLINE_MINUS_A, ElementGroup.MIXED,
822              ElementGroup.TABLE_CONTENT, ElementGroup.HEAD_CONTENT,
823              ElementGroup.TOP_CONTENT, ElementGroup.AREA_ELEMENT,
824              ElementGroup.FORM_ELEMENT, ElementGroup.LEGEND_ELEMENT,
825              ElementGroup.LI_ELEMENT, ElementGroup.DL_PART,
826              ElementGroup.P_ELEMENT, ElementGroup.OPTIONS_ELEMENT,
827              ElementGroup.OPTION_ELEMENT, ElementGroup.PARAM_ELEMENT,
828              ElementGroup.TABLE_ELEMENT, ElementGroup.TR_ELEMENT,
829              ElementGroup.TD_ELEMENT, ElementGroup.COL_ELEMENT
830          ), elementGroupBits(
831              ElementGroup.CHARACTER_DATA));
832      defineElement(
833          "select", false, elementGroupBits(
834              ElementGroup.INLINE
835          ), elementGroupBits(
836              ElementGroup.OPTIONS_ELEMENT
837          ));
838      defineElement(
839          "small", true, elementGroupBits(
840              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
841          ), elementGroupBits(
842              ElementGroup.INLINE
843          ));
844      defineElement(
845          "span", false, elementGroupBits(
846              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
847          ), elementGroupBits(
848              ElementGroup.INLINE
849          ));
850      defineElement(
851          "strike", true, elementGroupBits(
852              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
853          ), elementGroupBits(
854              ElementGroup.INLINE
855          ));
856      defineElement(
857          "strong", true, elementGroupBits(
858              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
859          ), elementGroupBits(
860              ElementGroup.INLINE
861          ));
862      defineElement(
863          "style", false, elementGroupBits(
864              ElementGroup.INLINE, ElementGroup.HEAD_CONTENT
865          ), elementGroupBits(
866              ElementGroup.CHARACTER_DATA
867          ));
868      defineElement(
869          "sub", true, elementGroupBits(
870              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
871          ), elementGroupBits(
872              ElementGroup.INLINE
873          ));
874      defineElement(
875          "sup", true, elementGroupBits(
876              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
877          ), elementGroupBits(
878              ElementGroup.INLINE
879          ));
880      defineElement(
881          "table", false, elementGroupBits(
882              ElementGroup.BLOCK, ElementGroup.TABLE_ELEMENT
883          ), elementGroupBits(
884              ElementGroup.TABLE_CONTENT, ElementGroup.FORM_ELEMENT
885          ), CloseTagScope.ALL);
886      defineElement(
887          "tbody", false, elementGroupBits(
888              ElementGroup.TABLE_CONTENT
889          ), elementGroupBits(
890              ElementGroup.TR_ELEMENT
891          ));
892      ElementContainmentInfo TD = defineElement(
893          "td", false, elementGroupBits(
894              ElementGroup.TD_ELEMENT
895          ), elementGroupBits(
896              ElementGroup.BLOCK, ElementGroup.INLINE
897          ), scopeBits(
898              CloseTagScope.COMMON, CloseTagScope.BUTTON,
899              CloseTagScope.LIST_ITEM
900          ));
901      defineElement(
902          "textarea", false,
903          // No, a textarea cannot be inside a link.
904          elementGroupBits(ElementGroup.INLINE),
905          elementGroupBits(ElementGroup.CHARACTER_DATA));
906      defineElement(
907          "tfoot", false, elementGroupBits(
908              ElementGroup.TABLE_CONTENT
909          ), elementGroupBits(
910              ElementGroup.FORM_ELEMENT, ElementGroup.TR_ELEMENT,
911              ElementGroup.TD_ELEMENT
912          ));
913      defineElement(
914          "th", false, elementGroupBits(
915              ElementGroup.TD_ELEMENT
916          ), elementGroupBits(
917              ElementGroup.BLOCK, ElementGroup.INLINE
918          ), scopeBits(
919              CloseTagScope.COMMON, CloseTagScope.BUTTON,
920              CloseTagScope.LIST_ITEM
921          ));
922      defineElement(
923          "thead", false, elementGroupBits(
924              ElementGroup.TABLE_CONTENT
925          ), elementGroupBits(
926              ElementGroup.FORM_ELEMENT, ElementGroup.TR_ELEMENT,
927              ElementGroup.TD_ELEMENT
928          ));
929      defineElement(
930          "title", false, elementGroupBits(ElementGroup.HEAD_CONTENT),
931          elementGroupBits(ElementGroup.CHARACTER_DATA));
932      defineElement(
933          "tr", false, elementGroupBits(
934              ElementGroup.TABLE_CONTENT, ElementGroup.TR_ELEMENT
935          ), elementGroupBits(
936              ElementGroup.FORM_ELEMENT, ElementGroup.TD_ELEMENT
937          ),
938          TD);
939      defineElement(
940          "tt", true, elementGroupBits(
941              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
942          ), elementGroupBits(
943              ElementGroup.INLINE
944          ));
945      defineElement(
946          "u", true, elementGroupBits(
947              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
948          ), elementGroupBits(
949              ElementGroup.INLINE
950          ));
951      defineElement(
952          "ul", false, elementGroupBits(
953              ElementGroup.BLOCK
954          ), elementGroupBits(
955              ElementGroup.LI_ELEMENT
956          ),
957          LI,
958          scopeBits(CloseTagScope.LIST_ITEM));
959      defineElement(
960          "var", false, elementGroupBits(
961              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
962          ), elementGroupBits(
963              ElementGroup.INLINE
964          ));
965      defineElement(
966          "video", false, elementGroupBits(
967              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
968          ), 0);
969      defineElement(
970          "wbr", false, elementGroupBits(
971              ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
972          ), 0);
973      defineElement(
974          "xmp", false, elementGroupBits(
975              ElementGroup.BLOCK
976          ), elementGroupBits(
977              ElementGroup.INLINE
978          ));
979
980    }
981
982    private static final ElementContainmentInfo CHARACTER_DATA_ONLY
983        = new ElementContainmentInfo(
984            "#text", false,
985            elementGroupBits(
986                ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A,
987                ElementGroup.BLOCK, ElementGroup.CHARACTER_DATA),
988            0, null, 0);
989  }
990
991  static boolean allowsPlainTextualContent(String canonElementName) {
992    ElementContainmentInfo info =
993       ELEMENT_CONTAINMENT_RELATIONSHIPS.get(canonElementName);
994    if (info == null
995        || ((info.contents
996             & ElementContainmentRelationships.CHARACTER_DATA_ONLY.types)
997            != 0)) {
998      switch (HtmlTextEscapingMode.getModeForTag(canonElementName)) {
999        case PCDATA:     return true;
1000        case RCDATA:     return true;
1001        case PLAIN_TEXT: return true;
1002        case VOID:       return false;
1003        case CDATA:
1004        case CDATA_SOMETIMES:
1005          return "xmp".equals(canonElementName)
1006              || "listing".equals(canonElementName);
1007      }
1008    }
1009    return false;
1010  }
1011}
1012