1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the  "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18/*
19 * $Id: ToTextStream.java 468654 2006-10-28 07:09:23Z minchau $
20 */
21package org.apache.xml.serializer;
22
23import java.io.IOException;
24
25import org.apache.xml.serializer.utils.MsgKey;
26import org.apache.xml.serializer.utils.Utils;
27import org.xml.sax.Attributes;
28import org.xml.sax.SAXException;
29
30/**
31 * This class is not a public API.
32 * It is only public because it is used in other packages.
33 * This class converts SAX or SAX-like calls to a
34 * serialized document for xsl:output method of "text".
35 * @xsl.usage internal
36 */
37public class ToTextStream extends ToStream
38{
39
40
41  /**
42   * Default constructor.
43   */
44  public ToTextStream()
45  {
46    super();
47  }
48
49
50
51  /**
52   * Receive notification of the beginning of a document.
53   *
54   * <p>The SAX parser will invoke this method only once, before any
55   * other methods in this interface or in DTDHandler (except for
56   * setDocumentLocator).</p>
57   *
58   * @throws org.xml.sax.SAXException Any SAX exception, possibly
59   *            wrapping another exception.
60   *
61   * @throws org.xml.sax.SAXException
62   */
63  protected void startDocumentInternal() throws org.xml.sax.SAXException
64  {
65    super.startDocumentInternal();
66
67    m_needToCallStartDocument = false;
68
69    // No action for the moment.
70  }
71
72  /**
73   * Receive notification of the end of a document.
74   *
75   * <p>The SAX parser will invoke this method only once, and it will
76   * be the last method invoked during the parse.  The parser shall
77   * not invoke this method until it has either abandoned parsing
78   * (because of an unrecoverable error) or reached the end of
79   * input.</p>
80   *
81   * @throws org.xml.sax.SAXException Any SAX exception, possibly
82   *            wrapping another exception.
83   *
84   * @throws org.xml.sax.SAXException
85   */
86  public void endDocument() throws org.xml.sax.SAXException
87  {
88    flushPending();
89    flushWriter();
90    if (m_tracer != null)
91        super.fireEndDoc();
92  }
93
94  /**
95   * Receive notification of the beginning of an element.
96   *
97   * <p>The Parser will invoke this method at the beginning of every
98   * element in the XML document; there will be a corresponding
99   * endElement() event for every startElement() event (even when the
100   * element is empty). All of the element's content will be
101   * reported, in order, before the corresponding endElement()
102   * event.</p>
103   *
104   * <p>If the element name has a namespace prefix, the prefix will
105   * still be attached.  Note that the attribute list provided will
106   * contain only attributes with explicit values (specified or
107   * defaulted): #IMPLIED attributes will be omitted.</p>
108   *
109   *
110   * @param namespaceURI The Namespace URI, or the empty string if the
111   *        element has no Namespace URI or if Namespace
112   *        processing is not being performed.
113   * @param localName The local name (without prefix), or the
114   *        empty string if Namespace processing is not being
115   *        performed.
116   * @param name The qualified name (with prefix), or the
117   *        empty string if qualified names are not available.
118   * @param atts The attributes attached to the element, if any.
119   * @throws org.xml.sax.SAXException Any SAX exception, possibly
120   *            wrapping another exception.
121   * @see #endElement
122   * @see org.xml.sax.AttributeList
123   *
124   * @throws org.xml.sax.SAXException
125   */
126  public void startElement(
127          String namespaceURI, String localName, String name, Attributes atts)
128            throws org.xml.sax.SAXException
129  {
130    // time to fire off startElement event
131    if (m_tracer != null) {
132        super.fireStartElem(name);
133        this.firePseudoAttributes();
134    }
135    return;
136  }
137
138  /**
139   * Receive notification of the end of an element.
140   *
141   * <p>The SAX parser will invoke this method at the end of every
142   * element in the XML document; there will be a corresponding
143   * startElement() event for every endElement() event (even when the
144   * element is empty).</p>
145   *
146   * <p>If the element name has a namespace prefix, the prefix will
147   * still be attached to the name.</p>
148   *
149   *
150   * @param namespaceURI The Namespace URI, or the empty string if the
151   *        element has no Namespace URI or if Namespace
152   *        processing is not being performed.
153   * @param localName The local name (without prefix), or the
154   *        empty string if Namespace processing is not being
155   *        performed.
156   * @param name The qualified name (with prefix), or the
157   *        empty string if qualified names are not available.
158   * @throws org.xml.sax.SAXException Any SAX exception, possibly
159   *            wrapping another exception.
160   *
161   * @throws org.xml.sax.SAXException
162   */
163  public void endElement(String namespaceURI, String localName, String name)
164          throws org.xml.sax.SAXException
165  {
166        if (m_tracer != null)
167            super.fireEndElem(name);
168  }
169
170  /**
171   * Receive notification of character data.
172   *
173   * <p>The Parser will call this method to report each chunk of
174   * character data.  SAX parsers may return all contiguous character
175   * data in a single chunk, or they may split it into several
176   * chunks; however, all of the characters in any single event
177   * must come from the same external entity, so that the Locator
178   * provides useful information.</p>
179   *
180   * <p>The application must not attempt to read from the array
181   * outside of the specified range.</p>
182   *
183   * <p>Note that some parsers will report whitespace using the
184   * ignorableWhitespace() method rather than this one (validating
185   * parsers must do so).</p>
186   *
187   * @param ch The characters from the XML document.
188   * @param start The start position in the array.
189   * @param length The number of characters to read from the array.
190   * @throws org.xml.sax.SAXException Any SAX exception, possibly
191   *            wrapping another exception.
192   * @see #ignorableWhitespace
193   * @see org.xml.sax.Locator
194   */
195  public void characters(char ch[], int start, int length)
196          throws org.xml.sax.SAXException
197  {
198
199    flushPending();
200
201    try
202    {
203        if (inTemporaryOutputState()) {
204            /* leave characters un-processed as we are
205             * creating temporary output, the output generated by
206             * this serializer will be input to a final serializer
207             * later on and it will do the processing in final
208             * output state (not temporary output state).
209             *
210             * A "temporary" ToTextStream serializer is used to
211             * evaluate attribute value templates (for example),
212             * and the result of evaluating such a thing
213             * is fed into a final serializer later on.
214             */
215            m_writer.write(ch, start, length);
216        }
217        else {
218            // In final output state we do process the characters!
219            writeNormalizedChars(ch, start, length, m_lineSepUse);
220        }
221
222        if (m_tracer != null)
223            super.fireCharEvent(ch, start, length);
224    }
225    catch(IOException ioe)
226    {
227      throw new SAXException(ioe);
228    }
229  }
230
231  /**
232   * If available, when the disable-output-escaping attribute is used,
233   * output raw text without escaping.
234   *
235   * @param ch The characters from the XML document.
236   * @param start The start position in the array.
237   * @param length The number of characters to read from the array.
238   *
239   * @throws org.xml.sax.SAXException Any SAX exception, possibly
240   *            wrapping another exception.
241   */
242  public void charactersRaw(char ch[], int start, int length)
243          throws org.xml.sax.SAXException
244  {
245
246    try
247    {
248      writeNormalizedChars(ch, start, length, m_lineSepUse);
249    }
250    catch(IOException ioe)
251    {
252      throw new SAXException(ioe);
253    }
254  }
255
256    /**
257     * Normalize the characters, but don't escape.  Different from
258     * SerializerToXML#writeNormalizedChars because it does not attempt to do
259     * XML escaping at all.
260     *
261     * @param ch The characters from the XML document.
262     * @param start The start position in the array.
263     * @param length The number of characters to read from the array.
264     * @param useLineSep true if the operating systems
265     * end-of-line separator should be output rather than a new-line character.
266     *
267     * @throws IOException
268     * @throws org.xml.sax.SAXException
269     */
270    void writeNormalizedChars(
271        final char ch[],
272            final int start,
273            final int length,
274            final boolean useLineSep)
275            throws IOException, org.xml.sax.SAXException
276    {
277        final String encoding = getEncoding();
278        final java.io.Writer writer = m_writer;
279        final int end = start + length;
280
281        /* copy a few "constants" before the loop for performance */
282        final char S_LINEFEED = CharInfo.S_LINEFEED;
283
284        // This for() loop always increments i by one at the end
285        // of the loop.  Additional increments of i adjust for when
286        // two input characters (a high/low UTF16 surrogate pair)
287        // are processed.
288        for (int i = start; i < end; i++) {
289            final char c = ch[i];
290
291            if (S_LINEFEED == c && useLineSep) {
292                writer.write(m_lineSep, 0, m_lineSepLen);
293                // one input char processed
294            } else if (m_encodingInfo.isInEncoding(c)) {
295                writer.write(c);
296                // one input char processed
297            } else if (Encodings.isHighUTF16Surrogate(c)) {
298                final int codePoint = writeUTF16Surrogate(c, ch, i, end);
299                if (codePoint != 0) {
300                    // I think we can just emit the message,
301                    // not crash and burn.
302                    final String integralValue = Integer.toString(codePoint);
303                    final String msg = Utils.messages.createMessage(
304                        MsgKey.ER_ILLEGAL_CHARACTER,
305                        new Object[] { integralValue, encoding });
306
307                    //Older behavior was to throw the message,
308                    //but newer gentler behavior is to write a message to System.err
309                    //throw new SAXException(msg);
310                    System.err.println(msg);
311
312                }
313                i++; // two input chars processed
314            } else {
315                // Don't know what to do with this char, it is
316                // not in the encoding and not a high char in
317                // a surrogate pair, so write out as an entity ref
318                if (encoding != null) {
319                    /* The output encoding is known,
320                     * so somthing is wrong.
321                     */
322
323                    // not in the encoding, so write out a character reference
324                    writer.write('&');
325                    writer.write('#');
326                    writer.write(Integer.toString(c));
327                    writer.write(';');
328
329                    // I think we can just emit the message,
330                    // not crash and burn.
331                    final String integralValue = Integer.toString(c);
332                    final String msg = Utils.messages.createMessage(
333                        MsgKey.ER_ILLEGAL_CHARACTER,
334                        new Object[] { integralValue, encoding });
335
336                    //Older behavior was to throw the message,
337                    //but newer gentler behavior is to write a message to System.err
338                    //throw new SAXException(msg);
339                    System.err.println(msg);
340                } else {
341                    /* The output encoding is not known,
342                     * so just write it out as-is.
343                     */
344                    writer.write(c);
345                }
346
347                // one input char was processed
348            }
349        }
350    }
351
352  /**
353   * Receive notification of cdata.
354   *
355   * <p>The Parser will call this method to report each chunk of
356   * character data.  SAX parsers may return all contiguous character
357   * data in a single chunk, or they may split it into several
358   * chunks; however, all of the characters in any single event
359   * must come from the same external entity, so that the Locator
360   * provides useful information.</p>
361   *
362   * <p>The application must not attempt to read from the array
363   * outside of the specified range.</p>
364   *
365   * <p>Note that some parsers will report whitespace using the
366   * ignorableWhitespace() method rather than this one (validating
367   * parsers must do so).</p>
368   *
369   * @param ch The characters from the XML document.
370   * @param start The start position in the array.
371   * @param length The number of characters to read from the array.
372   * @throws org.xml.sax.SAXException Any SAX exception, possibly
373   *            wrapping another exception.
374   * @see #ignorableWhitespace
375   * @see org.xml.sax.Locator
376   */
377  public void cdata(char ch[], int start, int length)
378          throws org.xml.sax.SAXException
379  {
380    try
381    {
382        writeNormalizedChars(ch, start, length, m_lineSepUse);
383        if (m_tracer != null)
384            super.fireCDATAEvent(ch, start, length);
385    }
386    catch(IOException ioe)
387    {
388      throw new SAXException(ioe);
389    }
390  }
391
392  /**
393   * Receive notification of ignorable whitespace in element content.
394   *
395   * <p>Validating Parsers must use this method to report each chunk
396   * of ignorable whitespace (see the W3C XML 1.0 recommendation,
397   * section 2.10): non-validating parsers may also use this method
398   * if they are capable of parsing and using content models.</p>
399   *
400   * <p>SAX parsers may return all contiguous whitespace in a single
401   * chunk, or they may split it into several chunks; however, all of
402   * the characters in any single event must come from the same
403   * external entity, so that the Locator provides useful
404   * information.</p>
405   *
406   * <p>The application must not attempt to read from the array
407   * outside of the specified range.</p>
408   *
409   * @param ch The characters from the XML document.
410   * @param start The start position in the array.
411   * @param length The number of characters to read from the array.
412   * @throws org.xml.sax.SAXException Any SAX exception, possibly
413   *            wrapping another exception.
414   * @see #characters
415   *
416   * @throws org.xml.sax.SAXException
417   */
418  public void ignorableWhitespace(char ch[], int start, int length)
419          throws org.xml.sax.SAXException
420  {
421
422    try
423    {
424      writeNormalizedChars(ch, start, length, m_lineSepUse);
425    }
426    catch(IOException ioe)
427    {
428      throw new SAXException(ioe);
429    }
430  }
431
432  /**
433   * Receive notification of a processing instruction.
434   *
435   * <p>The Parser will invoke this method once for each processing
436   * instruction found: note that processing instructions may occur
437   * before or after the main document element.</p>
438   *
439   * <p>A SAX parser should never report an XML declaration (XML 1.0,
440   * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
441   * using this method.</p>
442   *
443   * @param target The processing instruction target.
444   * @param data The processing instruction data, or null if
445   *        none was supplied.
446   * @throws org.xml.sax.SAXException Any SAX exception, possibly
447   *            wrapping another exception.
448   *
449   * @throws org.xml.sax.SAXException
450   */
451  public void processingInstruction(String target, String data)
452          throws org.xml.sax.SAXException
453  {
454    // flush anything pending first
455    flushPending();
456
457    if (m_tracer != null)
458        super.fireEscapingEvent(target, data);
459  }
460
461  /**
462   * Called when a Comment is to be constructed.
463   * Note that Xalan will normally invoke the other version of this method.
464   * %REVIEW% In fact, is this one ever needed, or was it a mistake?
465   *
466   * @param   data  The comment data.
467   * @throws org.xml.sax.SAXException Any SAX exception, possibly
468   *            wrapping another exception.
469   */
470  public void comment(String data) throws org.xml.sax.SAXException
471  {
472      final int length = data.length();
473      if (length > m_charsBuff.length)
474      {
475          m_charsBuff = new char[length*2 + 1];
476      }
477      data.getChars(0, length, m_charsBuff, 0);
478      comment(m_charsBuff, 0, length);
479  }
480
481  /**
482   * Report an XML comment anywhere in the document.
483   *
484   * This callback will be used for comments inside or outside the
485   * document element, including comments in the external DTD
486   * subset (if read).
487   *
488   * @param ch An array holding the characters in the comment.
489   * @param start The starting position in the array.
490   * @param length The number of characters to use from the array.
491   * @throws org.xml.sax.SAXException The application may raise an exception.
492   */
493  public void comment(char ch[], int start, int length)
494          throws org.xml.sax.SAXException
495  {
496
497    flushPending();
498    if (m_tracer != null)
499        super.fireCommentEvent(ch, start, length);
500  }
501
502  /**
503   * Receive notivication of a entityReference.
504   *
505   * @param name non-null reference to the name of the entity.
506   *
507   * @throws org.xml.sax.SAXException
508   */
509  public void entityReference(String name) throws org.xml.sax.SAXException
510  {
511        if (m_tracer != null)
512            super.fireEntityReference(name);
513  }
514
515    /**
516     * @see ExtendedContentHandler#addAttribute(String, String, String, String, String)
517     */
518    public void addAttribute(
519        String uri,
520        String localName,
521        String rawName,
522        String type,
523        String value,
524        boolean XSLAttribute)
525    {
526        // do nothing, just forget all about the attribute
527    }
528
529    /**
530     * @see org.xml.sax.ext.LexicalHandler#endCDATA()
531     */
532    public void endCDATA() throws SAXException
533    {
534        // do nothing
535    }
536
537    /**
538     * @see ExtendedContentHandler#endElement(String)
539     */
540    public void endElement(String elemName) throws SAXException
541    {
542        if (m_tracer != null)
543            super.fireEndElem(elemName);
544    }
545
546    /**
547     * From XSLTC
548     */
549    public void startElement(
550    String elementNamespaceURI,
551    String elementLocalName,
552    String elementName)
553    throws SAXException
554    {
555        if (m_needToCallStartDocument)
556            startDocumentInternal();
557        // time to fire off startlement event.
558        if (m_tracer != null) {
559            super.fireStartElem(elementName);
560            this.firePseudoAttributes();
561        }
562
563        return;
564    }
565
566
567    /**
568     * From XSLTC
569     */
570    public void characters(String characters)
571    throws SAXException
572    {
573        final int length = characters.length();
574        if (length > m_charsBuff.length)
575        {
576            m_charsBuff = new char[length*2 + 1];
577        }
578        characters.getChars(0, length, m_charsBuff, 0);
579        characters(m_charsBuff, 0, length);
580    }
581
582
583    /**
584     * From XSLTC
585     */
586    public void addAttribute(String name, String value)
587    {
588        // do nothing, forget about the attribute
589    }
590
591    /**
592     * Add a unique attribute
593     */
594    public void addUniqueAttribute(String qName, String value, int flags)
595        throws SAXException
596    {
597        // do nothing, forget about the attribute
598    }
599
600    public boolean startPrefixMapping(
601        String prefix,
602        String uri,
603        boolean shouldFlush)
604        throws SAXException
605    {
606        // no namespace support for HTML
607        return false;
608    }
609
610
611    public void startPrefixMapping(String prefix, String uri)
612        throws org.xml.sax.SAXException
613    {
614        // no namespace support for HTML
615    }
616
617
618    public void namespaceAfterStartElement(
619        final String prefix,
620        final String uri)
621        throws SAXException
622    {
623        // no namespace support for HTML
624    }
625
626    public void flushPending() throws org.xml.sax.SAXException
627    {
628            if (m_needToCallStartDocument)
629            {
630                startDocumentInternal();
631                m_needToCallStartDocument = false;
632            }
633    }
634}
635