1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18/*
19 * $Id: ToHTMLStream.java 468654 2006-10-28 07:09:23Z minchau $
20 */
21package org.apache.xml.serializer;
22
23import java.io.IOException;
24import java.util.Properties;
25
26import javax.xml.transform.Result;
27
28import org.apache.xml.serializer.utils.MsgKey;
29import org.apache.xml.serializer.utils.Utils;
30import org.xml.sax.Attributes;
31import org.xml.sax.SAXException;
32
33/**
34 * This serializer takes a series of SAX or
35 * SAX-like events and writes its output
36 * to the given stream.
37 *
38 * This class is not a public API, it is public
39 * because it is used from another package.
40 *
41 * @xsl.usage internal
42 */
43public class ToHTMLStream extends ToStream
44{
45
46    /** This flag is set while receiving events from the DTD */
47    protected boolean m_inDTD = false;
48
49    /** True if the current element is a block element.  (seems like
50     *  this needs to be a stack. -sb). */
51    private boolean m_inBlockElem = false;
52
53    /**
54     * Map that tells which XML characters should have special treatment, and it
55     *  provides character to entity name lookup.
56     */
57    private final CharInfo m_htmlcharInfo =
58//        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
59        CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
60
61    /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
62    static final Trie m_elementFlags = new Trie();
63
64    static {
65        initTagReference(m_elementFlags);
66    }
67    static void initTagReference(Trie m_elementFlags) {
68
69        // HTML 4.0 loose DTD
70        m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
71        m_elementFlags.put(
72            "FRAME",
73            new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
74        m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
75        m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
76        m_elementFlags.put(
77            "ISINDEX",
78            new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
79        m_elementFlags.put(
80            "APPLET",
81            new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
82        m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
83        m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
84        m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
85
86        // HTML 4.0 strict DTD
87        m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
88        m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
89        m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
90        m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
91        m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
92        m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
93        m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
94        m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
95        m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
96        m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
97        m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
98        m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
99        m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
100        m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
101        m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
102        m_elementFlags.put(
103            "SUP",
104            new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
105        m_elementFlags.put(
106            "SUB",
107            new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
108        m_elementFlags.put(
109            "SPAN",
110            new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
111        m_elementFlags.put(
112            "BDO",
113            new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
114        m_elementFlags.put(
115            "BR",
116            new ElemDesc(
117                0
118                    | ElemDesc.SPECIAL
119                    | ElemDesc.ASPECIAL
120                    | ElemDesc.EMPTY
121                    | ElemDesc.BLOCK));
122        m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
123        m_elementFlags.put(
124            "ADDRESS",
125            new ElemDesc(
126                0
127                    | ElemDesc.BLOCK
128                    | ElemDesc.BLOCKFORM
129                    | ElemDesc.BLOCKFORMFIELDSET));
130        m_elementFlags.put(
131            "DIV",
132            new ElemDesc(
133                0
134                    | ElemDesc.BLOCK
135                    | ElemDesc.BLOCKFORM
136                    | ElemDesc.BLOCKFORMFIELDSET));
137        m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
138        m_elementFlags.put(
139            "MAP",
140            new ElemDesc(
141                0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
142        m_elementFlags.put(
143            "AREA",
144            new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
145        m_elementFlags.put(
146            "LINK",
147            new ElemDesc(
148                0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
149        m_elementFlags.put(
150            "IMG",
151            new ElemDesc(
152                0
153                    | ElemDesc.SPECIAL
154                    | ElemDesc.ASPECIAL
155                    | ElemDesc.EMPTY
156                    | ElemDesc.WHITESPACESENSITIVE));
157        m_elementFlags.put(
158            "OBJECT",
159            new ElemDesc(
160                0
161                    | ElemDesc.SPECIAL
162                    | ElemDesc.ASPECIAL
163                    | ElemDesc.HEADMISC
164                    | ElemDesc.WHITESPACESENSITIVE));
165        m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
166        m_elementFlags.put(
167            "HR",
168            new ElemDesc(
169                0
170                    | ElemDesc.BLOCK
171                    | ElemDesc.BLOCKFORM
172                    | ElemDesc.BLOCKFORMFIELDSET
173                    | ElemDesc.EMPTY));
174        m_elementFlags.put(
175            "P",
176            new ElemDesc(
177                0
178                    | ElemDesc.BLOCK
179                    | ElemDesc.BLOCKFORM
180                    | ElemDesc.BLOCKFORMFIELDSET));
181        m_elementFlags.put(
182            "H1",
183            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
184        m_elementFlags.put(
185            "H2",
186            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
187        m_elementFlags.put(
188            "H3",
189            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
190        m_elementFlags.put(
191            "H4",
192            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
193        m_elementFlags.put(
194            "H5",
195            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
196        m_elementFlags.put(
197            "H6",
198            new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
199        m_elementFlags.put(
200            "PRE",
201            new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
202        m_elementFlags.put(
203            "Q",
204            new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
205        m_elementFlags.put(
206            "BLOCKQUOTE",
207            new ElemDesc(
208                0
209                    | ElemDesc.BLOCK
210                    | ElemDesc.BLOCKFORM
211                    | ElemDesc.BLOCKFORMFIELDSET));
212        m_elementFlags.put("INS", new ElemDesc(0));
213        m_elementFlags.put("DEL", new ElemDesc(0));
214        m_elementFlags.put(
215            "DL",
216            new ElemDesc(
217                0
218                    | ElemDesc.BLOCK
219                    | ElemDesc.BLOCKFORM
220                    | ElemDesc.BLOCKFORMFIELDSET));
221        m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
222        m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
223        m_elementFlags.put(
224            "OL",
225            new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
226        m_elementFlags.put(
227            "UL",
228            new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
229        m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
230        m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
231        m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
232        m_elementFlags.put(
233            "INPUT",
234            new ElemDesc(
235                0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
236        m_elementFlags.put(
237            "SELECT",
238            new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
239        m_elementFlags.put("OPTGROUP", new ElemDesc(0));
240        m_elementFlags.put("OPTION", new ElemDesc(0));
241        m_elementFlags.put(
242            "TEXTAREA",
243            new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
244        m_elementFlags.put(
245            "FIELDSET",
246            new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
247        m_elementFlags.put("LEGEND", new ElemDesc(0));
248        m_elementFlags.put(
249            "BUTTON",
250            new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
251        m_elementFlags.put(
252            "TABLE",
253            new ElemDesc(
254                0
255                    | ElemDesc.BLOCK
256                    | ElemDesc.BLOCKFORM
257                    | ElemDesc.BLOCKFORMFIELDSET));
258        m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
259        m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
260        m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
261        m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
262        m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
263        m_elementFlags.put(
264            "COL",
265            new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
266        m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
267        m_elementFlags.put("TH", new ElemDesc(0));
268        m_elementFlags.put("TD", new ElemDesc(0));
269        m_elementFlags.put(
270            "HEAD",
271            new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
272        m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
273        m_elementFlags.put(
274            "BASE",
275            new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
276        m_elementFlags.put(
277            "META",
278            new ElemDesc(
279                0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
280        m_elementFlags.put(
281            "STYLE",
282            new ElemDesc(
283                0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
284        m_elementFlags.put(
285            "SCRIPT",
286            new ElemDesc(
287                0
288                    | ElemDesc.SPECIAL
289                    | ElemDesc.ASPECIAL
290                    | ElemDesc.HEADMISC
291                    | ElemDesc.RAW));
292        m_elementFlags.put(
293            "NOSCRIPT",
294            new ElemDesc(
295                0
296                    | ElemDesc.BLOCK
297                    | ElemDesc.BLOCKFORM
298                    | ElemDesc.BLOCKFORMFIELDSET));
299        m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HTMLELEM));
300
301        // From "John Ky" <hand@syd.speednet.com.au
302        // Transitional Document Type Definition ()
303        // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
304        m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
305
306        // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
307        m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
308        m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
309
310        // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
311        m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
312
313        // From "John Ky" <hand@syd.speednet.com.au
314        m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
315
316        // HTML 4.0, section 16.5
317        m_elementFlags.put(
318            "IFRAME",
319            new ElemDesc(
320                0
321                    | ElemDesc.BLOCK
322                    | ElemDesc.BLOCKFORM
323                    | ElemDesc.BLOCKFORMFIELDSET));
324
325        // Netscape 4 extension
326        m_elementFlags.put(
327            "LAYER",
328            new ElemDesc(
329                0
330                    | ElemDesc.BLOCK
331                    | ElemDesc.BLOCKFORM
332                    | ElemDesc.BLOCKFORMFIELDSET));
333        // Netscape 4 extension
334        m_elementFlags.put(
335            "ILAYER",
336            new ElemDesc(
337                0
338                    | ElemDesc.BLOCK
339                    | ElemDesc.BLOCKFORM
340                    | ElemDesc.BLOCKFORMFIELDSET));
341
342        // NOW FOR ATTRIBUTE INFORMATION . . .
343        ElemDesc elemDesc;
344
345
346        // ----------------------------------------------
347        elemDesc = (ElemDesc) m_elementFlags.get("a");
348        elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
349        elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
350
351        // ----------------------------------------------
352        elemDesc = (ElemDesc) m_elementFlags.get("area");
353
354        elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
355        elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
356
357        // ----------------------------------------------
358        elemDesc = (ElemDesc) m_elementFlags.get("base");
359
360        elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
361
362        // ----------------------------------------------
363        elemDesc = (ElemDesc) m_elementFlags.get("button");
364        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
365
366        // ----------------------------------------------
367        elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
368
369        elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
370
371        // ----------------------------------------------
372        elemDesc = (ElemDesc) m_elementFlags.get("del");
373        elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
374
375        // ----------------------------------------------
376        elemDesc = (ElemDesc) m_elementFlags.get("dir");
377        elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
378
379        // ----------------------------------------------
380
381        elemDesc = (ElemDesc) m_elementFlags.get("div");
382        elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
383        elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
384
385        // ----------------------------------------------
386        elemDesc = (ElemDesc) m_elementFlags.get("dl");
387        elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
388
389        // ----------------------------------------------
390        elemDesc = (ElemDesc) m_elementFlags.get("form");
391        elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
392
393        // ----------------------------------------------
394        // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
395        elemDesc = (ElemDesc) m_elementFlags.get("frame");
396        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
397        elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
398        elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
399
400        // ----------------------------------------------
401        elemDesc = (ElemDesc) m_elementFlags.get("head");
402        elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
403
404        // ----------------------------------------------
405        elemDesc = (ElemDesc) m_elementFlags.get("hr");
406        elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
407
408        // ----------------------------------------------
409        // HTML 4.0, section 16.5
410        elemDesc = (ElemDesc) m_elementFlags.get("iframe");
411        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
412        elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
413
414        // ----------------------------------------------
415        // Netscape 4 extension
416        elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
417        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
418
419        // ----------------------------------------------
420        elemDesc = (ElemDesc) m_elementFlags.get("img");
421        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
422        elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
423        elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
424        elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
425
426        // ----------------------------------------------
427        elemDesc = (ElemDesc) m_elementFlags.get("input");
428
429        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
430        elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
431        elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
432        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
433        elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
434        elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
435
436        // ----------------------------------------------
437        elemDesc = (ElemDesc) m_elementFlags.get("ins");
438        elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
439
440        // ----------------------------------------------
441        // Netscape 4 extension
442        elemDesc = (ElemDesc) m_elementFlags.get("layer");
443        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
444
445        // ----------------------------------------------
446        elemDesc = (ElemDesc) m_elementFlags.get("link");
447        elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
448
449        // ----------------------------------------------
450        elemDesc = (ElemDesc) m_elementFlags.get("menu");
451        elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
452
453        // ----------------------------------------------
454        elemDesc = (ElemDesc) m_elementFlags.get("object");
455
456        elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
457        elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
458        elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
459        elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
460        elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
461        elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
462
463        // ----------------------------------------------
464        elemDesc = (ElemDesc) m_elementFlags.get("ol");
465        elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
466
467        // ----------------------------------------------
468        elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
469        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
470
471        // ----------------------------------------------
472        elemDesc = (ElemDesc) m_elementFlags.get("option");
473        elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
474        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
475
476        // ----------------------------------------------
477        elemDesc = (ElemDesc) m_elementFlags.get("q");
478        elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
479
480        // ----------------------------------------------
481        elemDesc = (ElemDesc) m_elementFlags.get("script");
482        elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
483        elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
484        elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
485
486        // ----------------------------------------------
487        elemDesc = (ElemDesc) m_elementFlags.get("select");
488        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
489        elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
490
491        // ----------------------------------------------
492        elemDesc = (ElemDesc) m_elementFlags.get("table");
493        elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
494
495        // ----------------------------------------------
496        elemDesc = (ElemDesc) m_elementFlags.get("td");
497        elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
498
499        // ----------------------------------------------
500        elemDesc = (ElemDesc) m_elementFlags.get("textarea");
501        elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
502        elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
503
504        // ----------------------------------------------
505        elemDesc = (ElemDesc) m_elementFlags.get("th");
506        elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
507
508        // ----------------------------------------------
509        // The nowrap attribute of a tr element is both
510        // a Netscape and Internet-Explorer extension
511        elemDesc = (ElemDesc) m_elementFlags.get("tr");
512        elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
513
514        // ----------------------------------------------
515        elemDesc = (ElemDesc) m_elementFlags.get("ul");
516        elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
517    }
518
519    /**
520     * Dummy element for elements not found.
521     */
522    static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
523
524    /** True if URLs should be specially escaped with the %xx form. */
525    private boolean m_specialEscapeURLs = true;
526
527    /** True if the META tag should be omitted. */
528    private boolean m_omitMetaTag = false;
529
530    /**
531     * Tells if the formatter should use special URL escaping.
532     *
533     * @param bool True if URLs should be specially escaped with the %xx form.
534     */
535    public void setSpecialEscapeURLs(boolean bool)
536    {
537        m_specialEscapeURLs = bool;
538    }
539
540    /**
541     * Tells if the formatter should omit the META tag.
542     *
543     * @param bool True if the META tag should be omitted.
544     */
545    public void setOmitMetaTag(boolean bool)
546    {
547        m_omitMetaTag = bool;
548    }
549
550    /**
551     * Specifies an output format for this serializer. It the
552     * serializer has already been associated with an output format,
553     * it will switch to the new format. This method should not be
554     * called while the serializer is in the process of serializing
555     * a document.
556     *
557     * This method can be called multiple times before starting
558     * the serialization of a particular result-tree. In principle
559     * all serialization parameters can be changed, with the exception
560     * of method="html" (it must be method="html" otherwise we
561     * shouldn't even have a ToHTMLStream object here!)
562     *
563     * @param format The output format or serialzation parameters
564     * to use.
565     */
566    public void setOutputFormat(Properties format)
567    {
568        /*
569         * If "format" does not contain the property
570         * S_USE_URL_ESCAPING, then don't set this value at all,
571         * just leave as-is rather than explicitly setting it.
572         */
573        String value;
574        value = format.getProperty(OutputPropertiesFactory.S_USE_URL_ESCAPING);
575        if (value != null) {
576            m_specialEscapeURLs =
577                OutputPropertyUtils.getBooleanProperty(
578                    OutputPropertiesFactory.S_USE_URL_ESCAPING,
579                    format);
580        }
581
582        /*
583         * If "format" does not contain the property
584         * S_OMIT_META_TAG, then don't set this value at all,
585         * just leave as-is rather than explicitly setting it.
586         */
587        value = format.getProperty(OutputPropertiesFactory.S_OMIT_META_TAG);
588        if (value != null) {
589           m_omitMetaTag =
590                OutputPropertyUtils.getBooleanProperty(
591                    OutputPropertiesFactory.S_OMIT_META_TAG,
592                    format);
593        }
594
595        super.setOutputFormat(format);
596    }
597
598    /**
599     * Tells if the formatter should use special URL escaping.
600     *
601     * @return True if URLs should be specially escaped with the %xx form.
602     */
603    private final boolean getSpecialEscapeURLs()
604    {
605        return m_specialEscapeURLs;
606    }
607
608    /**
609     * Tells if the formatter should omit the META tag.
610     *
611     * @return True if the META tag should be omitted.
612     */
613    private final boolean getOmitMetaTag()
614    {
615        return m_omitMetaTag;
616    }
617
618    /**
619     * Get a description of the given element.
620     *
621     * @param name non-null name of element, case insensitive.
622     *
623     * @return non-null reference to ElemDesc, which may be m_dummy if no
624     *         element description matches the given name.
625     */
626    public static final ElemDesc getElemDesc(String name)
627    {
628        /* this method used to return m_dummy  when name was null
629         * but now it doesn't check and and requires non-null name.
630         */
631        Object obj = m_elementFlags.get(name);
632        if (null != obj)
633            return (ElemDesc)obj;
634        return m_dummy;
635    }
636
637
638    /**
639     * A Trie that is just a copy of the "static" one.
640     * We need this one to be able to use the faster, but not thread-safe
641     * method Trie.get2(name)
642     */
643    private Trie m_htmlInfo = new Trie(m_elementFlags);
644    /**
645     * Calls to this method could be replaced with calls to
646     * getElemDesc(name), but this one should be faster.
647     */
648    private ElemDesc getElemDesc2(String name)
649    {
650        Object obj = m_htmlInfo.get2(name);
651        if (null != obj)
652            return (ElemDesc)obj;
653        return m_dummy;
654    }
655
656    /**
657     * Default constructor.
658     */
659    public ToHTMLStream()
660    {
661
662        super();
663        // we are just constructing this thing, no output properties
664        // have been used, so we will set the right default for
665        // indenting anyways
666        m_doIndent = true;
667        m_charInfo = m_htmlcharInfo;
668        // initialize namespaces
669        m_prefixMap = new NamespaceMappings();
670
671    }
672
673    /** The name of the current element. */
674//    private String m_currentElementName = null;
675
676    /**
677     * Receive notification of the beginning of a document.
678     *
679     * @throws org.xml.sax.SAXException Any SAX exception, possibly
680     *            wrapping another exception.
681     *
682     * @throws org.xml.sax.SAXException
683     */
684    protected void startDocumentInternal() throws org.xml.sax.SAXException
685    {
686        super.startDocumentInternal();
687
688        m_needToCallStartDocument = false;
689        m_needToOutputDocTypeDecl = true;
690        m_startNewLine = false;
691        setOmitXMLDeclaration(true);
692    }
693
694    /**
695     * This method should only get called once.
696     * If a DOCTYPE declaration needs to get written out, it will
697     * be written out. If it doesn't need to be written out, then
698     * the call to this method has no effect.
699     */
700    private void outputDocTypeDecl(String name) throws SAXException {
701        if (true == m_needToOutputDocTypeDecl)
702        {
703            String doctypeSystem = getDoctypeSystem();
704            String doctypePublic = getDoctypePublic();
705            if ((null != doctypeSystem) || (null != doctypePublic))
706            {
707                final java.io.Writer writer = m_writer;
708                try
709                {
710                writer.write("<!DOCTYPE ");
711                writer.write(name);
712
713                if (null != doctypePublic)
714                {
715                    writer.write(" PUBLIC \"");
716                    writer.write(doctypePublic);
717                    writer.write('"');
718                }
719
720                if (null != doctypeSystem)
721                {
722                    if (null == doctypePublic)
723                        writer.write(" SYSTEM \"");
724                    else
725                        writer.write(" \"");
726
727                    writer.write(doctypeSystem);
728                    writer.write('"');
729                }
730
731                writer.write('>');
732                outputLineSep();
733                }
734                catch(IOException e)
735                {
736                    throw new SAXException(e);
737                }
738            }
739        }
740
741        m_needToOutputDocTypeDecl = false;
742    }
743
744    /**
745     * Receive notification of the end of a document.
746     *
747     * @throws org.xml.sax.SAXException Any SAX exception, possibly
748     *            wrapping another exception.
749     *
750     * @throws org.xml.sax.SAXException
751     */
752    public final void endDocument() throws org.xml.sax.SAXException
753    {
754
755        flushPending();
756        if (m_doIndent && !m_isprevtext)
757        {
758            try
759            {
760            outputLineSep();
761            }
762            catch(IOException e)
763            {
764                throw new SAXException(e);
765            }
766        }
767
768        flushWriter();
769        if (m_tracer != null)
770            super.fireEndDoc();
771    }
772
773    /**
774     *  Receive notification of the beginning of an element.
775     *
776     *
777     *  @param namespaceURI
778     *  @param localName
779     *  @param name The element type name.
780     *  @param atts The attributes attached to the element, if any.
781     *  @throws org.xml.sax.SAXException Any SAX exception, possibly
782     *             wrapping another exception.
783     *  @see #endElement
784     *  @see org.xml.sax.AttributeList
785     */
786    public void startElement(
787        String namespaceURI,
788        String localName,
789        String name,
790        Attributes atts)
791        throws org.xml.sax.SAXException
792    {
793
794        ElemContext elemContext = m_elemContext;
795
796        // clean up any pending things first
797        if (elemContext.m_startTagOpen)
798        {
799            closeStartTag();
800            elemContext.m_startTagOpen = false;
801        }
802        else if (m_cdataTagOpen)
803        {
804            closeCDATA();
805            m_cdataTagOpen = false;
806        }
807        else if (m_needToCallStartDocument)
808        {
809            startDocumentInternal();
810            m_needToCallStartDocument = false;
811        }
812
813        if (m_needToOutputDocTypeDecl) {
814            String n = name;
815            if (n == null || n.length() == 0) {
816                // If the lexical QName is not given
817                // use the localName in the DOCTYPE
818                n = localName;
819            }
820            outputDocTypeDecl(n);
821        }
822
823
824        // if this element has a namespace then treat it like XML
825        if (null != namespaceURI && namespaceURI.length() > 0)
826        {
827            super.startElement(namespaceURI, localName, name, atts);
828
829            return;
830        }
831
832        try
833        {
834            // getElemDesc2(name) is faster than getElemDesc(name)
835            ElemDesc elemDesc = getElemDesc2(name);
836            int elemFlags = elemDesc.getFlags();
837
838            // deal with indentation issues first
839            if (m_doIndent)
840            {
841
842                boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
843                if (m_ispreserve)
844                    m_ispreserve = false;
845                else if (
846                    (null != elemContext.m_elementName)
847                    && (!m_inBlockElem
848                        || isBlockElement) /* && !isWhiteSpaceSensitive */
849                    )
850                {
851                    m_startNewLine = true;
852
853                    indent();
854
855                }
856                m_inBlockElem = !isBlockElement;
857            }
858
859            // save any attributes for later processing
860            if (atts != null)
861                addAttributes(atts);
862
863            m_isprevtext = false;
864            final java.io.Writer writer = m_writer;
865            writer.write('<');
866            writer.write(name);
867
868
869
870            if (m_tracer != null)
871                firePseudoAttributes();
872
873            if ((elemFlags & ElemDesc.EMPTY) != 0)
874            {
875                // an optimization for elements which are expected
876                // to be empty.
877                m_elemContext = elemContext.push();
878                /* XSLTC sometimes calls namespaceAfterStartElement()
879                 * so we need to remember the name
880                 */
881                m_elemContext.m_elementName = name;
882                m_elemContext.m_elementDesc = elemDesc;
883                return;
884            }
885            else
886            {
887                elemContext = elemContext.push(namespaceURI,localName,name);
888                m_elemContext = elemContext;
889                elemContext.m_elementDesc = elemDesc;
890                elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
891            }
892
893
894            if ((elemFlags & ElemDesc.HEADELEM) != 0)
895            {
896                // This is the <HEAD> element, do some special processing
897                closeStartTag();
898                elemContext.m_startTagOpen = false;
899                if (!m_omitMetaTag)
900                {
901                    if (m_doIndent)
902                        indent();
903                    writer.write(
904                        "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
905                    String encoding = getEncoding();
906                    String encode = Encodings.getMimeEncoding(encoding);
907                    writer.write(encode);
908                    writer.write("\">");
909                }
910            }
911        }
912        catch (IOException e)
913        {
914            throw new SAXException(e);
915        }
916    }
917
918    /**
919     *  Receive notification of the end of an element.
920     *
921     *
922     *  @param namespaceURI
923     *  @param localName
924     *  @param name The element type name
925     *  @throws org.xml.sax.SAXException Any SAX exception, possibly
926     *             wrapping another exception.
927     */
928    public final void endElement(
929        final String namespaceURI,
930        final String localName,
931        final String name)
932        throws org.xml.sax.SAXException
933    {
934        // deal with any pending issues
935        if (m_cdataTagOpen)
936            closeCDATA();
937
938        // if the element has a namespace, treat it like XML, not HTML
939        if (null != namespaceURI && namespaceURI.length() > 0)
940        {
941            super.endElement(namespaceURI, localName, name);
942
943            return;
944        }
945
946        try
947        {
948
949            ElemContext elemContext = m_elemContext;
950            final ElemDesc elemDesc = elemContext.m_elementDesc;
951            final int elemFlags = elemDesc.getFlags();
952            final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
953
954            // deal with any indentation issues
955            if (m_doIndent)
956            {
957                final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
958                boolean shouldIndent = false;
959
960                if (m_ispreserve)
961                {
962                    m_ispreserve = false;
963                }
964                else if (m_doIndent && (!m_inBlockElem || isBlockElement))
965                {
966                    m_startNewLine = true;
967                    shouldIndent = true;
968                }
969                if (!elemContext.m_startTagOpen && shouldIndent)
970                    indent(elemContext.m_currentElemDepth - 1);
971                m_inBlockElem = !isBlockElement;
972            }
973
974            final java.io.Writer writer = m_writer;
975            if (!elemContext.m_startTagOpen)
976            {
977                writer.write("</");
978                writer.write(name);
979                writer.write('>');
980            }
981            else
982            {
983                // the start-tag open when this method was called,
984                // so we need to process it now.
985
986                if (m_tracer != null)
987                    super.fireStartElem(name);
988
989                // the starting tag was still open when we received this endElement() call
990                // so we need to process any gathered attributes NOW, before they go away.
991                int nAttrs = m_attributes.getLength();
992                if (nAttrs > 0)
993                {
994                    processAttributes(m_writer, nAttrs);
995                    // clear attributes object for re-use with next element
996                    m_attributes.clear();
997                }
998                if (!elemEmpty)
999                {
1000                    // As per Dave/Paul recommendation 12/06/2000
1001                    // if (shouldIndent)
1002                    // writer.write('>');
1003                    //  indent(m_currentIndent);
1004
1005                    writer.write("></");
1006                    writer.write(name);
1007                    writer.write('>');
1008                }
1009                else
1010                {
1011                    writer.write('>');
1012                }
1013            }
1014
1015            // clean up because the element has ended
1016            if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
1017                m_ispreserve = true;
1018            m_isprevtext = false;
1019
1020            // fire off the end element event
1021            if (m_tracer != null)
1022                super.fireEndElem(name);
1023
1024            // OPTIMIZE-EMPTY
1025            if (elemEmpty)
1026            {
1027                // a quick exit if the HTML element had no children.
1028                // This block of code can be removed if the corresponding block of code
1029                // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
1030                m_elemContext = elemContext.m_prev;
1031                return;
1032            }
1033
1034            // some more clean because the element has ended.
1035            if (!elemContext.m_startTagOpen)
1036            {
1037                if (m_doIndent && !m_preserves.isEmpty())
1038                    m_preserves.pop();
1039            }
1040            m_elemContext = elemContext.m_prev;
1041//            m_isRawStack.pop();
1042        }
1043        catch (IOException e)
1044        {
1045            throw new SAXException(e);
1046        }
1047    }
1048
1049    /**
1050     * Process an attribute.
1051     * @param   writer The writer to write the processed output to.
1052     * @param   name   The name of the attribute.
1053     * @param   value   The value of the attribute.
1054     * @param   elemDesc The description of the HTML element
1055     *           that has this attribute.
1056     *
1057     * @throws org.xml.sax.SAXException
1058     */
1059    protected void processAttribute(
1060        java.io.Writer writer,
1061        String name,
1062        String value,
1063        ElemDesc elemDesc)
1064        throws IOException
1065    {
1066        writer.write(' ');
1067
1068        if (   ((value.length() == 0) || value.equalsIgnoreCase(name))
1069            && elemDesc != null
1070            && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1071        {
1072            writer.write(name);
1073        }
1074        else
1075        {
1076            // %REVIEW% %OPT%
1077            // Two calls to single-char write may NOT
1078            // be more efficient than one to string-write...
1079            writer.write(name);
1080            writer.write("=\"");
1081            if (   elemDesc != null
1082                && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1083                writeAttrURI(writer, value, m_specialEscapeURLs);
1084            else
1085                writeAttrString(writer, value, this.getEncoding());
1086            writer.write('"');
1087
1088        }
1089    }
1090
1091    /**
1092     * Tell if a character is an ASCII digit.
1093     */
1094    private boolean isASCIIDigit(char c)
1095    {
1096        return (c >= '0' && c <= '9');
1097    }
1098
1099    /**
1100     * Make an integer into an HH hex value.
1101     * Does no checking on the size of the input, since this
1102     * is only meant to be used locally by writeAttrURI.
1103     *
1104     * @param i must be a value less than 255.
1105     *
1106     * @return should be a two character string.
1107     */
1108    private static String makeHHString(int i)
1109    {
1110        String s = Integer.toHexString(i).toUpperCase();
1111        if (s.length() == 1)
1112        {
1113            s = "0" + s;
1114        }
1115        return s;
1116    }
1117
1118    /**
1119    * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
1120    * @param str must be 2 characters long
1121    *
1122    * @return true or false
1123    */
1124    private boolean isHHSign(String str)
1125    {
1126        boolean sign = true;
1127        try
1128        {
1129            char r = (char) Integer.parseInt(str, 16);
1130        }
1131        catch (NumberFormatException e)
1132        {
1133            sign = false;
1134        }
1135        return sign;
1136    }
1137
1138    /**
1139     * Write the specified <var>string</var> after substituting non ASCII characters,
1140     * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1141     *
1142     * @param   string      String to convert to XML format.
1143     * @param doURLEscaping True if we should try to encode as
1144     *                      per http://www.ietf.org/rfc/rfc2396.txt.
1145     *
1146     * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1147     */
1148    public void writeAttrURI(
1149        final java.io.Writer writer, String string, boolean doURLEscaping)
1150        throws IOException
1151    {
1152        // http://www.ietf.org/rfc/rfc2396.txt says:
1153        // A URI is always in an "escaped" form, since escaping or unescaping a
1154        // completed URI might change its semantics.  Normally, the only time
1155        // escape encodings can safely be made is when the URI is being created
1156        // from its component parts; each component may have its own set of
1157        // characters that are reserved, so only the mechanism responsible for
1158        // generating or interpreting that component can determine whether or
1159        // not escaping a character will change its semantics. Likewise, a URI
1160        // must be separated into its components before the escaped characters
1161        // within those components can be safely decoded.
1162        //
1163        // ...So we do our best to do limited escaping of the URL, without
1164        // causing damage.  If the URL is already properly escaped, in theory, this
1165        // function should not change the string value.
1166
1167        final int end = string.length();
1168        if (end > m_attrBuff.length)
1169        {
1170           m_attrBuff = new char[end*2 + 1];
1171        }
1172        string.getChars(0,end, m_attrBuff, 0);
1173        final char[] chars = m_attrBuff;
1174
1175        int cleanStart = 0;
1176        int cleanLength = 0;
1177
1178
1179        char ch = 0;
1180        for (int i = 0; i < end; i++)
1181        {
1182            ch = chars[i];
1183
1184            if ((ch < 32) || (ch > 126))
1185            {
1186                if (cleanLength > 0)
1187                {
1188                    writer.write(chars, cleanStart, cleanLength);
1189                    cleanLength = 0;
1190                }
1191                if (doURLEscaping)
1192                {
1193                    // Encode UTF16 to UTF8.
1194                    // Reference is Unicode, A Primer, by Tony Graham.
1195                    // Page 92.
1196
1197                    // Note that Kay doesn't escape 0x20...
1198                    //  if(ch == 0x20) // Not sure about this... -sb
1199                    //  {
1200                    //    writer.write(ch);
1201                    //  }
1202                    //  else
1203                    if (ch <= 0x7F)
1204                    {
1205                        writer.write('%');
1206                        writer.write(makeHHString(ch));
1207                    }
1208                    else if (ch <= 0x7FF)
1209                    {
1210                        // Clear low 6 bits before rotate, put high 4 bits in low byte,
1211                        // and set two high bits.
1212                        int high = (ch >> 6) | 0xC0;
1213                        int low = (ch & 0x3F) | 0x80;
1214                        // First 6 bits, + high bit
1215                        writer.write('%');
1216                        writer.write(makeHHString(high));
1217                        writer.write('%');
1218                        writer.write(makeHHString(low));
1219                    }
1220                    else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
1221                    {
1222                        // I'm sure this can be done in 3 instructions, but I choose
1223                        // to try and do it exactly like it is done in the book, at least
1224                        // until we are sure this is totally clean.  I don't think performance
1225                        // is a big issue with this particular function, though I could be
1226                        // wrong.  Also, the stuff below clearly does more masking than
1227                        // it needs to do.
1228
1229                        // Clear high 6 bits.
1230                        int highSurrogate = ((int) ch) & 0x03FF;
1231
1232                        // Middle 4 bits (wwww) + 1
1233                        // "Note that the value of wwww from the high surrogate bit pattern
1234                        // is incremented to make the uuuuu bit pattern in the scalar value
1235                        // so the surrogate pair don't address the BMP."
1236                        int wwww = ((highSurrogate & 0x03C0) >> 6);
1237                        int uuuuu = wwww + 1;
1238
1239                        // next 4 bits
1240                        int zzzz = (highSurrogate & 0x003C) >> 2;
1241
1242                        // low 2 bits
1243                        int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1244
1245                        // Get low surrogate character.
1246                        ch = chars[++i];
1247
1248                        // Clear high 6 bits.
1249                        int lowSurrogate = ((int) ch) & 0x03FF;
1250
1251                        // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1252                        yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1253
1254                        // bottom 6 bits.
1255                        int xxxxxx = (lowSurrogate & 0x003F);
1256
1257                        int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1258                        int byte2 =
1259                            0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1260                        int byte3 = 0x80 | yyyyyy;
1261                        int byte4 = 0x80 | xxxxxx;
1262
1263                        writer.write('%');
1264                        writer.write(makeHHString(byte1));
1265                        writer.write('%');
1266                        writer.write(makeHHString(byte2));
1267                        writer.write('%');
1268                        writer.write(makeHHString(byte3));
1269                        writer.write('%');
1270                        writer.write(makeHHString(byte4));
1271                    }
1272                    else
1273                    {
1274                        int high = (ch >> 12) | 0xE0; // top 4 bits
1275                        int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1276                        // middle 6 bits
1277                        int low = (ch & 0x3F) | 0x80;
1278                        // First 6 bits, + high bit
1279                        writer.write('%');
1280                        writer.write(makeHHString(high));
1281                        writer.write('%');
1282                        writer.write(makeHHString(middle));
1283                        writer.write('%');
1284                        writer.write(makeHHString(low));
1285                    }
1286
1287                }
1288                else if (escapingNotNeeded(ch))
1289                {
1290                    writer.write(ch);
1291                }
1292                else
1293                {
1294                    writer.write("&#");
1295                    writer.write(Integer.toString(ch));
1296                    writer.write(';');
1297                }
1298                // In this character range we have first written out any previously accumulated
1299                // "clean" characters, then processed the current more complicated character,
1300                // which may have incremented "i".
1301                // We now we reset the next possible clean character.
1302                cleanStart = i + 1;
1303            }
1304            // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1305            // not allowing quotes in the URI proper syntax, nor in the fragment
1306            // identifier, we believe that it's OK to double escape quotes.
1307            else if (ch == '"')
1308            {
1309                // If the character is a '%' number number, try to avoid double-escaping.
1310                // There is a question if this is legal behavior.
1311
1312                // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1313                // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1314
1315                //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1316
1317                // We are no longer escaping '%'
1318
1319                if (cleanLength > 0)
1320                {
1321                    writer.write(chars, cleanStart, cleanLength);
1322                    cleanLength = 0;
1323                }
1324
1325
1326                // Mike Kay encodes this as &#34;, so he may know something I don't?
1327                if (doURLEscaping)
1328                    writer.write("%22");
1329                else
1330                    writer.write("&quot;"); // we have to escape this, I guess.
1331
1332                // We have written out any clean characters, then the escaped '%' and now we
1333                // We now we reset the next possible clean character.
1334                cleanStart = i + 1;
1335            }
1336            else if (ch == '&')
1337            {
1338                // HTML 4.01 reads, "Authors should use "&amp;" (ASCII decimal 38)
1339                // instead of "&" to avoid confusion with the beginning of a character
1340                // reference (entity reference open delimiter).
1341                if (cleanLength > 0)
1342                {
1343                    writer.write(chars, cleanStart, cleanLength);
1344                    cleanLength = 0;
1345                }
1346                writer.write("&amp;");
1347                cleanStart = i + 1;
1348            }
1349            else
1350            {
1351                // no processing for this character, just count how
1352                // many characters in a row that we have that need no processing
1353                cleanLength++;
1354            }
1355        }
1356
1357        // are there any clean characters at the end of the array
1358        // that we haven't processed yet?
1359        if (cleanLength > 1)
1360        {
1361            // if the whole string can be written out as-is do so
1362            // otherwise write out the clean chars at the end of the
1363            // array
1364            if (cleanStart == 0)
1365                writer.write(string);
1366            else
1367                writer.write(chars, cleanStart, cleanLength);
1368        }
1369        else if (cleanLength == 1)
1370        {
1371            // a little optimization for 1 clean character
1372            // (we could have let the previous if(...) handle them all)
1373            writer.write(ch);
1374        }
1375    }
1376
1377    /**
1378     * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1379     * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
1380     *
1381     * @param   string      String to convert to XML format.
1382     * @param   encoding    CURRENTLY NOT IMPLEMENTED.
1383     *
1384     * @throws org.xml.sax.SAXException
1385     */
1386    public void writeAttrString(
1387        final java.io.Writer writer, String string, String encoding)
1388        throws IOException
1389    {
1390        final int end = string.length();
1391        if (end > m_attrBuff.length)
1392        {
1393            m_attrBuff = new char[end * 2 + 1];
1394        }
1395        string.getChars(0, end, m_attrBuff, 0);
1396        final char[] chars = m_attrBuff;
1397
1398
1399
1400        int cleanStart = 0;
1401        int cleanLength = 0;
1402
1403        char ch = 0;
1404        for (int i = 0; i < end; i++)
1405        {
1406            ch = chars[i];
1407
1408            // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1409            // System.out.println("ch: "+(int)ch);
1410            // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1411            // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1412            if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch)))
1413            {
1414                cleanLength++;
1415            }
1416            else if ('<' == ch || '>' == ch)
1417            {
1418                cleanLength++; // no escaping in this case, as specified in 15.2
1419            }
1420            else if (
1421                ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1422            {
1423                cleanLength++; // no escaping in this case, as specified in 15.2
1424            }
1425            else
1426            {
1427                if (cleanLength > 0)
1428                {
1429                    writer.write(chars,cleanStart,cleanLength);
1430                    cleanLength = 0;
1431                }
1432                int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1433
1434                if (i != pos)
1435                {
1436                    i = pos - 1;
1437                }
1438                else
1439                {
1440                    if (Encodings.isHighUTF16Surrogate(ch))
1441                    {
1442
1443                            writeUTF16Surrogate(ch, chars, i, end);
1444                            i++; // two input characters processed
1445                                 // this increments by one and the for()
1446                                 // loop itself increments by another one.
1447                    }
1448
1449                    // The next is kind of a hack to keep from escaping in the case
1450                    // of Shift_JIS and the like.
1451
1452                    /*
1453                    else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1454                    && (ch != 160))
1455                    {
1456                    writer.write(ch);  // no escaping in this case
1457                    }
1458                    else
1459                    */
1460                    String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1461                    if (null != outputStringForChar)
1462                    {
1463                        writer.write(outputStringForChar);
1464                    }
1465                    else if (escapingNotNeeded(ch))
1466                    {
1467                        writer.write(ch); // no escaping in this case
1468                    }
1469                    else
1470                    {
1471                        writer.write("&#");
1472                        writer.write(Integer.toString(ch));
1473                        writer.write(';');
1474                    }
1475                }
1476                cleanStart = i + 1;
1477            }
1478        } // end of for()
1479
1480        // are there any clean characters at the end of the array
1481        // that we haven't processed yet?
1482        if (cleanLength > 1)
1483        {
1484            // if the whole string can be written out as-is do so
1485            // otherwise write out the clean chars at the end of the
1486            // array
1487            if (cleanStart == 0)
1488                writer.write(string);
1489            else
1490                writer.write(chars, cleanStart, cleanLength);
1491        }
1492        else if (cleanLength == 1)
1493        {
1494            // a little optimization for 1 clean character
1495            // (we could have let the previous if(...) handle them all)
1496            writer.write(ch);
1497        }
1498    }
1499
1500
1501
1502    /**
1503     * Receive notification of character data.
1504     *
1505     * <p>The Parser will call this method to report each chunk of
1506     * character data.  SAX parsers may return all contiguous character
1507     * data in a single chunk, or they may split it into several
1508     * chunks; however, all of the characters in any single event
1509     * must come from the same external entity, so that the Locator
1510     * provides useful information.</p>
1511     *
1512     * <p>The application must not attempt to read from the array
1513     * outside of the specified range.</p>
1514     *
1515     * <p>Note that some parsers will report whitespace using the
1516     * ignorableWhitespace() method rather than this one (validating
1517     * parsers must do so).</p>
1518     *
1519     * @param chars The characters from the XML document.
1520     * @param start The start position in the array.
1521     * @param length The number of characters to read from the array.
1522     * @throws org.xml.sax.SAXException Any SAX exception, possibly
1523     *            wrapping another exception.
1524     * @see #ignorableWhitespace
1525     * @see org.xml.sax.Locator
1526     *
1527     * @throws org.xml.sax.SAXException
1528     */
1529    public final void characters(char chars[], int start, int length)
1530        throws org.xml.sax.SAXException
1531    {
1532
1533        if (m_elemContext.m_isRaw)
1534        {
1535            try
1536            {
1537                // Clean up some pending issues.
1538                if (m_elemContext.m_startTagOpen)
1539                {
1540                    closeStartTag();
1541                    m_elemContext.m_startTagOpen = false;
1542                }
1543
1544                m_ispreserve = true;
1545
1546                writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1547
1548                // time to generate characters event
1549                if (m_tracer != null)
1550                    super.fireCharEvent(chars, start, length);
1551
1552                return;
1553            }
1554            catch (IOException ioe)
1555            {
1556                throw new org.xml.sax.SAXException(
1557                    Utils.messages.createMessage(MsgKey.ER_OIERROR,null),ioe);
1558            }
1559        }
1560        else
1561        {
1562            super.characters(chars, start, length);
1563        }
1564    }
1565
1566    /**
1567     *  Receive notification of cdata.
1568     *
1569     *  <p>The Parser will call this method to report each chunk of
1570     *  character data.  SAX parsers may return all contiguous character
1571     *  data in a single chunk, or they may split it into several
1572     *  chunks; however, all of the characters in any single event
1573     *  must come from the same external entity, so that the Locator
1574     *  provides useful information.</p>
1575     *
1576     *  <p>The application must not attempt to read from the array
1577     *  outside of the specified range.</p>
1578     *
1579     *  <p>Note that some parsers will report whitespace using the
1580     *  ignorableWhitespace() method rather than this one (validating
1581     *  parsers must do so).</p>
1582     *
1583     *  @param ch The characters from the XML document.
1584     *  @param start The start position in the array.
1585     *  @param length The number of characters to read from the array.
1586     *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1587     *             wrapping another exception.
1588     *  @see #ignorableWhitespace
1589     *  @see org.xml.sax.Locator
1590     *
1591     * @throws org.xml.sax.SAXException
1592     */
1593    public final void cdata(char ch[], int start, int length)
1594        throws org.xml.sax.SAXException
1595    {
1596
1597        if ((null != m_elemContext.m_elementName)
1598            && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1599                || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1600        {
1601            try
1602            {
1603                if (m_elemContext.m_startTagOpen)
1604                {
1605                    closeStartTag();
1606                    m_elemContext.m_startTagOpen = false;
1607                }
1608
1609                m_ispreserve = true;
1610
1611                if (shouldIndent())
1612                    indent();
1613
1614                // writer.write(ch, start, length);
1615                writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1616            }
1617            catch (IOException ioe)
1618            {
1619                throw new org.xml.sax.SAXException(
1620                    Utils.messages.createMessage(
1621                        MsgKey.ER_OIERROR,
1622                        null),
1623                    ioe);
1624                //"IO error", ioe);
1625            }
1626        }
1627        else
1628        {
1629            super.cdata(ch, start, length);
1630        }
1631    }
1632
1633    /**
1634     *  Receive notification of a processing instruction.
1635     *
1636     *  @param target The processing instruction target.
1637     *  @param data The processing instruction data, or null if
1638     *         none was supplied.
1639     *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1640     *             wrapping another exception.
1641     *
1642     * @throws org.xml.sax.SAXException
1643     */
1644    public void processingInstruction(String target, String data)
1645        throws org.xml.sax.SAXException
1646    {
1647
1648        // Process any pending starDocument and startElement first.
1649        flushPending();
1650
1651        // Use a fairly nasty hack to tell if the next node is supposed to be
1652        // unescaped text.
1653        if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1654        {
1655            startNonEscaping();
1656        }
1657        else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1658        {
1659            endNonEscaping();
1660        }
1661        else
1662        {
1663            try
1664            {
1665                // clean up any pending things first
1666                if (m_elemContext.m_startTagOpen)
1667                {
1668                    closeStartTag();
1669                    m_elemContext.m_startTagOpen = false;
1670                }
1671                else if (m_cdataTagOpen)
1672                {
1673                    closeCDATA();
1674                }
1675                else if (m_needToCallStartDocument)
1676                {
1677                    startDocumentInternal();
1678                }
1679
1680
1681            /*
1682             * Perhaps processing instructions can be written out in HTML before
1683             * the DOCTYPE, in which case this could be emitted with the
1684             * startElement call, that knows the name of the document element
1685             * doing it right.
1686             */
1687            if (true == m_needToOutputDocTypeDecl)
1688                outputDocTypeDecl("html"); // best guess for the upcoming element
1689
1690
1691            if (shouldIndent())
1692                indent();
1693
1694            final java.io.Writer writer = m_writer;
1695            //writer.write("<?" + target);
1696            writer.write("<?");
1697            writer.write(target);
1698
1699            if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1700                writer.write(' ');
1701
1702            //writer.write(data + ">"); // different from XML
1703            writer.write(data); // different from XML
1704            writer.write('>'); // different from XML
1705
1706            // Always output a newline char if not inside of an
1707            // element. The whitespace is not significant in that
1708            // case.
1709            if (m_elemContext.m_currentElemDepth <= 0)
1710                outputLineSep();
1711
1712            m_startNewLine = true;
1713            }
1714            catch(IOException e)
1715            {
1716                throw new SAXException(e);
1717            }
1718        }
1719
1720        // now generate the PI event
1721        if (m_tracer != null)
1722            super.fireEscapingEvent(target, data);
1723     }
1724
1725    /**
1726     * Receive notivication of a entityReference.
1727     *
1728     * @param name non-null reference to entity name string.
1729     *
1730     * @throws org.xml.sax.SAXException
1731     */
1732    public final void entityReference(String name)
1733        throws org.xml.sax.SAXException
1734    {
1735        try
1736        {
1737
1738        final java.io.Writer writer = m_writer;
1739        writer.write('&');
1740        writer.write(name);
1741        writer.write(';');
1742
1743        } catch(IOException e)
1744        {
1745            throw new SAXException(e);
1746        }
1747    }
1748    /**
1749     * @see ExtendedContentHandler#endElement(String)
1750     */
1751    public final void endElement(String elemName) throws SAXException
1752    {
1753        endElement(null, null, elemName);
1754    }
1755
1756    /**
1757     * Process the attributes, which means to write out the currently
1758     * collected attributes to the writer. The attributes are not
1759     * cleared by this method
1760     *
1761     * @param writer the writer to write processed attributes to.
1762     * @param nAttrs the number of attributes in m_attributes
1763     * to be processed
1764     *
1765     * @throws org.xml.sax.SAXException
1766     */
1767    public void processAttributes(java.io.Writer writer, int nAttrs)
1768        throws IOException,SAXException
1769    {
1770            /*
1771             * process the collected attributes
1772             */
1773            for (int i = 0; i < nAttrs; i++)
1774            {
1775                processAttribute(
1776                    writer,
1777                    m_attributes.getQName(i),
1778                    m_attributes.getValue(i),
1779                    m_elemContext.m_elementDesc);
1780            }
1781    }
1782
1783    /**
1784     * For the enclosing elements starting tag write out out any attributes
1785     * followed by ">". At this point we also mark if this element is
1786     * a cdata-section-element.
1787     *
1788     *@throws org.xml.sax.SAXException
1789     */
1790    protected void closeStartTag() throws SAXException
1791    {
1792            try
1793            {
1794
1795            // finish processing attributes, time to fire off the start element event
1796            if (m_tracer != null)
1797                super.fireStartElem(m_elemContext.m_elementName);
1798
1799            int nAttrs = m_attributes.getLength();
1800            if (nAttrs>0)
1801            {
1802                processAttributes(m_writer, nAttrs);
1803                // clear attributes object for re-use with next element
1804                m_attributes.clear();
1805            }
1806
1807            m_writer.write('>');
1808
1809            /* At this point we have the prefix mappings now, so
1810             * lets determine if the current element is specified in the cdata-
1811             * section-elements list.
1812             */
1813            if (m_CdataElems != null) // if there are any cdata sections
1814                m_elemContext.m_isCdataSection = isCdataSection();
1815            if (m_doIndent)
1816            {
1817                m_isprevtext = false;
1818                m_preserves.push(m_ispreserve);
1819            }
1820
1821            }
1822            catch(IOException e)
1823            {
1824                throw new SAXException(e);
1825            }
1826    }
1827
1828
1829
1830        /**
1831         * This method is used when a prefix/uri namespace mapping
1832         * is indicated after the element was started with a
1833         * startElement() and before and endElement().
1834         * startPrefixMapping(prefix,uri) would be used before the
1835         * startElement() call.
1836         * @param uri the URI of the namespace
1837         * @param prefix the prefix associated with the given URI.
1838         *
1839         * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1840         */
1841        public void namespaceAfterStartElement(String prefix, String uri)
1842            throws SAXException
1843        {
1844            // hack for XSLTC with finding URI for default namespace
1845            if (m_elemContext.m_elementURI == null)
1846            {
1847                String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1848                if (prefix1 == null && EMPTYSTRING.equals(prefix))
1849                {
1850                    // the elements URI is not known yet, and it
1851                    // doesn't have a prefix, and we are currently
1852                    // setting the uri for prefix "", so we have
1853                    // the uri for the element... lets remember it
1854                    m_elemContext.m_elementURI = uri;
1855                }
1856            }
1857            startPrefixMapping(prefix,uri,false);
1858        }
1859
1860    public void startDTD(String name, String publicId, String systemId)
1861        throws SAXException
1862    {
1863        m_inDTD = true;
1864        super.startDTD(name, publicId, systemId);
1865    }
1866
1867    /**
1868     * Report the end of DTD declarations.
1869     * @throws org.xml.sax.SAXException The application may raise an exception.
1870     * @see #startDTD
1871     */
1872    public void endDTD() throws org.xml.sax.SAXException
1873    {
1874        m_inDTD = false;
1875        /* for ToHTMLStream the DOCTYPE is entirely output in the
1876         * startDocumentInternal() method, so don't do anything here
1877         */
1878    }
1879    /**
1880     * This method does nothing.
1881     */
1882    public void attributeDecl(
1883        String eName,
1884        String aName,
1885        String type,
1886        String valueDefault,
1887        String value)
1888        throws SAXException
1889    {
1890        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1891    }
1892
1893    /**
1894     * This method does nothing.
1895     */
1896    public void elementDecl(String name, String model) throws SAXException
1897    {
1898        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1899    }
1900    /**
1901     * This method does nothing.
1902     */
1903    public void internalEntityDecl(String name, String value)
1904        throws SAXException
1905    {
1906        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1907    }
1908    /**
1909     * This method does nothing.
1910     */
1911    public void externalEntityDecl(
1912        String name,
1913        String publicId,
1914        String systemId)
1915        throws SAXException
1916    {
1917        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1918    }
1919
1920    /**
1921     * This method is used to add an attribute to the currently open element.
1922     * The caller has guaranted that this attribute is unique, which means that it
1923     * not been seen before and will not be seen again.
1924     *
1925     * @param name the qualified name of the attribute
1926     * @param value the value of the attribute which can contain only
1927     * ASCII printable characters characters in the range 32 to 127 inclusive.
1928     * @param flags the bit values of this integer give optimization information.
1929     */
1930    public void addUniqueAttribute(String name, String value, int flags)
1931        throws SAXException
1932    {
1933        try
1934        {
1935            final java.io.Writer writer = m_writer;
1936            if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1937            {
1938                // "flags" has indicated that the characters
1939                // '>'  '<'   '&'  and '"' are not in the value and
1940                // m_htmlcharInfo has recorded that there are no other
1941                // entities in the range 0 to 127 so we write out the
1942                // value directly
1943                writer.write(' ');
1944                writer.write(name);
1945                writer.write("=\"");
1946                writer.write(value);
1947                writer.write('"');
1948            }
1949            else if (
1950                (flags & HTML_ATTREMPTY) > 0
1951                    && (value.length() == 0 || value.equalsIgnoreCase(name)))
1952            {
1953                writer.write(' ');
1954                writer.write(name);
1955            }
1956            else
1957            {
1958                writer.write(' ');
1959                writer.write(name);
1960                writer.write("=\"");
1961                if ((flags & HTML_ATTRURL) > 0)
1962                {
1963                    writeAttrURI(writer, value, m_specialEscapeURLs);
1964                }
1965                else
1966                {
1967                    writeAttrString(writer, value, this.getEncoding());
1968                }
1969                writer.write('"');
1970            }
1971        } catch (IOException e) {
1972            throw new SAXException(e);
1973        }
1974    }
1975
1976    public void comment(char ch[], int start, int length)
1977            throws SAXException
1978    {
1979        // The internal DTD subset is not serialized by the ToHTMLStream serializer
1980        if (m_inDTD)
1981            return;
1982
1983        // Clean up some pending issues, just in case
1984        // this call is coming right after a startElement()
1985        // or we are in the middle of writing out CDATA
1986        // or if a startDocument() call was not received
1987        if (m_elemContext.m_startTagOpen)
1988        {
1989            closeStartTag();
1990            m_elemContext.m_startTagOpen = false;
1991        }
1992        else if (m_cdataTagOpen)
1993        {
1994            closeCDATA();
1995        }
1996        else if (m_needToCallStartDocument)
1997        {
1998            startDocumentInternal();
1999        }
2000
2001        /*
2002         * Perhaps comments can be written out in HTML before the DOCTYPE.
2003         * In this case we might delete this call to writeOutDOCTYPE, and
2004         * it would be handled within the startElement() call.
2005         */
2006        if (m_needToOutputDocTypeDecl)
2007            outputDocTypeDecl("html"); // best guess for the upcoming element
2008
2009        super.comment(ch, start, length);
2010    }
2011
2012    public boolean reset()
2013    {
2014        boolean ret = super.reset();
2015        if (!ret)
2016            return false;
2017        resetToHTMLStream();
2018        return true;
2019    }
2020
2021    private void resetToHTMLStream()
2022    {
2023        // m_htmlcharInfo remains unchanged
2024        // m_htmlInfo = null;  // Don't reset
2025        m_inBlockElem = false;
2026        m_inDTD = false;
2027        m_omitMetaTag = false;
2028        m_specialEscapeURLs = true;
2029    }
2030
2031    static class Trie
2032    {
2033        /**
2034         * A digital search trie for 7-bit ASCII text
2035         * The API is a subset of java.util.Hashtable
2036         * The key must be a 7-bit ASCII string
2037         * The value may be any Java Object
2038         * One can get an object stored in a trie from its key,
2039         * but the search is either case sensitive or case
2040         * insensitive to the characters in the key, and this
2041         * choice of sensitivity or insensitivity is made when
2042         * the Trie is created, before any objects are put in it.
2043         *
2044         * This class is a copy of the one in org.apache.xml.utils.
2045         * It exists to cut the serializers dependancy on that package.
2046         *
2047         * @xsl.usage internal
2048         */
2049
2050        /** Size of the m_nextChar array.  */
2051        public static final int ALPHA_SIZE = 128;
2052
2053        /** The root node of the tree.    */
2054        final Node m_Root;
2055
2056        /** helper buffer to convert Strings to char arrays */
2057        private char[] m_charBuffer = new char[0];
2058
2059        /** true if the search for an object is lower case only with the key */
2060        private final boolean m_lowerCaseOnly;
2061
2062        /**
2063         * Construct the trie that has a case insensitive search.
2064         */
2065        public Trie()
2066        {
2067            m_Root = new Node();
2068            m_lowerCaseOnly = false;
2069        }
2070
2071        /**
2072         * Construct the trie given the desired case sensitivity with the key.
2073         * @param lowerCaseOnly true if the search keys are to be loser case only,
2074         * not case insensitive.
2075         */
2076        public Trie(boolean lowerCaseOnly)
2077        {
2078            m_Root = new Node();
2079            m_lowerCaseOnly = lowerCaseOnly;
2080        }
2081
2082        /**
2083         * Put an object into the trie for lookup.
2084         *
2085         * @param key must be a 7-bit ASCII string
2086         * @param value any java object.
2087         *
2088         * @return The old object that matched key, or null.
2089         */
2090        public Object put(String key, Object value)
2091        {
2092
2093            final int len = key.length();
2094            if (len > m_charBuffer.length)
2095            {
2096                // make the biggest buffer ever needed in get(String)
2097                m_charBuffer = new char[len];
2098            }
2099
2100            Node node = m_Root;
2101
2102            for (int i = 0; i < len; i++)
2103            {
2104                Node nextNode =
2105                    node.m_nextChar[Character.toLowerCase(key.charAt(i))];
2106
2107                if (nextNode != null)
2108                {
2109                    node = nextNode;
2110                }
2111                else
2112                {
2113                    for (; i < len; i++)
2114                    {
2115                        Node newNode = new Node();
2116                        if (m_lowerCaseOnly)
2117                        {
2118                            // put this value into the tree only with a lower case key
2119                            node.m_nextChar[Character.toLowerCase(
2120                                key.charAt(i))] =
2121                                newNode;
2122                        }
2123                        else
2124                        {
2125                            // put this value into the tree with a case insensitive key
2126                            node.m_nextChar[Character.toUpperCase(
2127                                key.charAt(i))] =
2128                                newNode;
2129                            node.m_nextChar[Character.toLowerCase(
2130                                key.charAt(i))] =
2131                                newNode;
2132                        }
2133                        node = newNode;
2134                    }
2135                    break;
2136                }
2137            }
2138
2139            Object ret = node.m_Value;
2140
2141            node.m_Value = value;
2142
2143            return ret;
2144        }
2145
2146        /**
2147         * Get an object that matches the key.
2148         *
2149         * @param key must be a 7-bit ASCII string
2150         *
2151         * @return The object that matches the key, or null.
2152         */
2153        public Object get(final String key)
2154        {
2155
2156            final int len = key.length();
2157
2158            /* If the name is too long, we won't find it, this also keeps us
2159             * from overflowing m_charBuffer
2160             */
2161            if (m_charBuffer.length < len)
2162                return null;
2163
2164            Node node = m_Root;
2165            switch (len) // optimize the look up based on the number of chars
2166            {
2167                // case 0 looks silly, but the generated bytecode runs
2168                // faster for lookup of elements of length 2 with this in
2169                // and a fair bit faster.  Don't know why.
2170                case 0 :
2171                    {
2172                        return null;
2173                    }
2174
2175                case 1 :
2176                    {
2177                        final char ch = key.charAt(0);
2178                        if (ch < ALPHA_SIZE)
2179                        {
2180                            node = node.m_nextChar[ch];
2181                            if (node != null)
2182                                return node.m_Value;
2183                        }
2184                        return null;
2185                    }
2186                    //                comment out case 2 because the default is faster
2187                    //                case 2 :
2188                    //                    {
2189                    //                        final char ch0 = key.charAt(0);
2190                    //                        final char ch1 = key.charAt(1);
2191                    //                        if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
2192                    //                        {
2193                    //                            node = node.m_nextChar[ch0];
2194                    //                            if (node != null)
2195                    //                            {
2196                    //
2197                    //                                if (ch1 < ALPHA_SIZE)
2198                    //                                {
2199                    //                                    node = node.m_nextChar[ch1];
2200                    //                                    if (node != null)
2201                    //                                        return node.m_Value;
2202                    //                                }
2203                    //                            }
2204                    //                        }
2205                    //                        return null;
2206                    //                   }
2207                default :
2208                    {
2209                        for (int i = 0; i < len; i++)
2210                        {
2211                            // A thread-safe way to loop over the characters
2212                            final char ch = key.charAt(i);
2213                            if (ALPHA_SIZE <= ch)
2214                            {
2215                                // the key is not 7-bit ASCII so we won't find it here
2216                                return null;
2217                            }
2218
2219                            node = node.m_nextChar[ch];
2220                            if (node == null)
2221                                return null;
2222                        }
2223
2224                        return node.m_Value;
2225                    }
2226            }
2227        }
2228
2229        /**
2230         * The node representation for the trie.
2231         * @xsl.usage internal
2232         */
2233        private class Node
2234        {
2235
2236            /**
2237             * Constructor, creates a Node[ALPHA_SIZE].
2238             */
2239            Node()
2240            {
2241                m_nextChar = new Node[ALPHA_SIZE];
2242                m_Value = null;
2243            }
2244
2245            /** The next nodes.   */
2246            final Node m_nextChar[];
2247
2248            /** The value.   */
2249            Object m_Value;
2250        }
2251        /**
2252         * Construct the trie from another Trie.
2253         * Both the existing Trie and this new one share the same table for
2254         * lookup, and it is assumed that the table is fully populated and
2255         * not changing anymore.
2256         *
2257         * @param existingTrie the Trie that this one is a copy of.
2258         */
2259        public Trie(Trie existingTrie)
2260        {
2261            // copy some fields from the existing Trie into this one.
2262            m_Root = existingTrie.m_Root;
2263            m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
2264
2265            // get a buffer just big enough to hold the longest key in the table.
2266            int max = existingTrie.getLongestKeyLength();
2267            m_charBuffer = new char[max];
2268        }
2269
2270        /**
2271         * Get an object that matches the key.
2272         * This method is faster than get(), but is not thread-safe.
2273         *
2274         * @param key must be a 7-bit ASCII string
2275         *
2276         * @return The object that matches the key, or null.
2277         */
2278        public Object get2(final String key)
2279        {
2280
2281            final int len = key.length();
2282
2283            /* If the name is too long, we won't find it, this also keeps us
2284             * from overflowing m_charBuffer
2285             */
2286            if (m_charBuffer.length < len)
2287                return null;
2288
2289            Node node = m_Root;
2290            switch (len) // optimize the look up based on the number of chars
2291            {
2292                // case 0 looks silly, but the generated bytecode runs
2293                // faster for lookup of elements of length 2 with this in
2294                // and a fair bit faster.  Don't know why.
2295                case 0 :
2296                    {
2297                        return null;
2298                    }
2299
2300                case 1 :
2301                    {
2302                        final char ch = key.charAt(0);
2303                        if (ch < ALPHA_SIZE)
2304                        {
2305                            node = node.m_nextChar[ch];
2306                            if (node != null)
2307                                return node.m_Value;
2308                        }
2309                        return null;
2310                    }
2311                default :
2312                    {
2313                        /* Copy string into array. This is not thread-safe because
2314                         * it modifies the contents of m_charBuffer. If multiple
2315                         * threads were to use this Trie they all would be
2316                         * using this same array (not good). So this
2317                         * method is not thread-safe, but it is faster because
2318                         * converting to a char[] and looping over elements of
2319                         * the array is faster than a String's charAt(i).
2320                         */
2321                        key.getChars(0, len, m_charBuffer, 0);
2322
2323                        for (int i = 0; i < len; i++)
2324                        {
2325                            final char ch = m_charBuffer[i];
2326                            if (ALPHA_SIZE <= ch)
2327                            {
2328                                // the key is not 7-bit ASCII so we won't find it here
2329                                return null;
2330                            }
2331
2332                            node = node.m_nextChar[ch];
2333                            if (node == null)
2334                                return null;
2335                        }
2336
2337                        return node.m_Value;
2338                    }
2339            }
2340        }
2341
2342        /**
2343         * Get the length of the longest key used in the table.
2344         */
2345        public int getLongestKeyLength()
2346        {
2347            return m_charBuffer.length;
2348        }
2349    }
2350}
2351