1/*
2 * Copyright (C) 2008-2009 Marc Blank
3 * Licensed to The Android Open Source Project.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package com.android.exchange.adapter;
19
20import android.content.Context;
21
22import com.android.exchange.Eas;
23import com.android.exchange.EasException;
24import com.android.exchange.service.EasService;
25import com.android.exchange.utility.FileLogger;
26import com.android.mail.utils.LogUtils;
27import com.google.common.annotations.VisibleForTesting;
28
29import java.io.ByteArrayOutputStream;
30import java.io.FileNotFoundException;
31import java.io.FileOutputStream;
32import java.io.IOException;
33import java.io.InputStream;
34import java.util.ArrayDeque;
35import java.util.ArrayList;
36import java.util.Arrays;
37import java.util.Deque;
38
39/**
40 * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that
41 * EAS uses (as defined in the EAS specification).
42 *
43 * Supports:
44 *      WBXML tokens to encode XML tags
45 *      WBXML code pages to support multiple XML namespaces
46 *      Inline strings
47 *      Opaque data
48 *
49 * Does not support: (throws EasParserException)
50 *      String tables
51 *      Entities
52 *      Processing instructions
53 *      Attribute encoding
54 *
55 */
56public abstract class Parser {
57    private static final boolean LOG_VERBOSE = false;
58
59    private static final String LOG_TAG = Eas.LOG_TAG;
60
61    // The following constants are Wbxml standard
62    public static final int START_DOCUMENT = 0;
63    public static final int END_DOCUMENT = 1;
64    private static final int DONE = 1;
65    private static final int START = 2;
66    public static final int END = 3;
67    private static final int TEXT = 4;
68    private static final int OPAQUE = 5;
69    private static final int NOT_ENDED = Integer.MIN_VALUE;
70    private static final int EOF_BYTE = -1;
71
72    private boolean capture = false;
73
74    private ArrayList<Integer> captureArray;
75
76    // The input stream for this parser
77    private InputStream in;
78
79    // The stack of names of tags being processed; used when debug = true
80    private String[] nameArray = new String[32];
81
82    public class Tag {
83        private final int mPage;
84        private final int mIndex;
85        // Whether the tag is associated with content (a value)
86        public final boolean mNoContent;
87        private final String mName;
88
89        public Tag(final int page, final int id) {
90            mPage = page;
91            // The tag is in the low 6 bits
92            mIndex = id & Tags.PAGE_MASK;
93            // If the high bit is set, there is content (a value) to be read
94            mNoContent = (id & Wbxml.WITH_CONTENT) == 0;
95            if (Tags.isGlobalTag(mIndex)) {
96                mName = "unsupported-WBXML";
97            } else if (!Tags.isValidTag(mPage, mIndex)) {
98                mName = "unknown";
99            } else {
100                mName = Tags.getTagName(mPage, mIndex);
101            }
102        }
103
104        public int getTagNum() {
105            if (Tags.isGlobalTag(mIndex)) {
106                return mIndex;
107            }
108            return (mPage << Tags.PAGE_SHIFT) | mIndex;
109        }
110
111        @Override
112        public String toString() {
113            return mName;
114        }
115    }
116
117    // The stack of tags being processed
118    private final Deque<Tag> startTagArray = new ArrayDeque<Tag>();
119
120    private Tag startTag;
121
122    // The type of the last token read (eg, TEXT, OPAQUE, END, etc).
123    private int type;
124
125    // The current page. As of EAS 14.1, this is a value 0-24.
126    private int page;
127
128    // The current tag. The low order 6 bits contain the tag index and the
129    // higher order bits the page number. The format matches that used for
130    // the tag enums defined in Tags.java.
131    public int tag;
132
133    // Whether the current tag is associated with content (a value)
134    public boolean noContent;
135
136    // The value read, as a String
137    private String text;
138
139    // The value read, as bytes
140    private byte[] bytes;
141
142    // TODO: Define a new parse exception type rather than lumping these in as IOExceptions.
143
144    /**
145     * Generated when the parser comes to EOF prematurely during parsing (i.e. in error)
146     */
147    public class EofException extends IOException {
148        private static final long serialVersionUID = 1L;
149    }
150
151    /**
152     * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's
153     * input stream; in other words, the stream had no content.
154     */
155    public class EmptyStreamException extends EofException {
156        private static final long serialVersionUID = 1L;
157    }
158
159    public class EodException extends IOException {
160        private static final long serialVersionUID = 1L;
161    }
162
163    public class EasParserException extends IOException {
164        private static final long serialVersionUID = 1L;
165
166        EasParserException() {
167            super("WBXML format error");
168        }
169
170        EasParserException(final String reason) {
171            super(reason);
172        }
173    }
174
175    public boolean parse() throws IOException, EasException {
176        return false;
177    }
178
179    public Parser(final InputStream in) throws IOException {
180        setInput(in, true);
181    }
182
183    /**
184     * Constructor for use when switching parsers within a input stream
185     * @param parser an existing, initialized parser
186     * @throws IOException
187     */
188    public Parser(final Parser parser) throws IOException {
189        setInput(parser.in, false);
190    }
191
192    protected InputStream getInput() {
193        return in;
194    }
195
196    /**
197     * Turns on data capture; this is used to create test streams that represent "live" data and
198     * can be used against the various parsers.
199     */
200    public void captureOn() {
201        capture = true;
202        captureArray = new ArrayList<Integer>();
203    }
204
205    /**
206     * Turns off data capture; writes the captured data to a specified file.
207     */
208    public void captureOff(final Context context, final String file) {
209        try {
210            final FileOutputStream out = context.openFileOutput(file,
211                    Context.MODE_WORLD_WRITEABLE);
212            out.write(captureArray.toString().getBytes());
213            out.close();
214        } catch (FileNotFoundException e) {
215            // This is debug code; exceptions aren't interesting.
216        } catch (IOException e) {
217            // This is debug code; exceptions aren't interesting.
218        }
219    }
220
221    /**
222     * Return the value of the current tag, as a byte array. Throws EasParserException
223     * if neither opaque nor text data is present. Never returns null--returns
224     * an empty byte[] array for empty data.
225     *
226     * @return the byte array value of the current tag
227     * @throws IOException
228     */
229    public byte[] getValueBytes() throws IOException {
230        final String name = startTag.toString();
231
232        getNext();
233        // This means there was no value given, just <Foo/>; we'll return empty array
234        if (type == END) {
235            log("No value for tag: " + name);
236            return new byte[0];
237        } else if (type != OPAQUE && type != TEXT) {
238            throw new EasParserException("Expected OPAQUE or TEXT data for tag " + name);
239        }
240
241        // Save the value
242        final byte[] val = type == OPAQUE ? bytes : text.getBytes("UTF-8");
243        // Read the next token; it had better be the end of the current tag
244        getNext();
245        // If not, throw an exception
246        if (type != END) {
247            throw new EasParserException("No END found for tag " + name);
248        }
249        return val;
250    }
251
252    /**
253     * Return the value of the current tag, as a String. Throws EasParserException
254     * for non-text data. Never returns null--returns an empty string if no data.
255     *
256     * @return the String value of the current tag
257     * @throws IOException
258     */
259    public String getValue() throws IOException {
260        final String name = startTag.toString();
261
262        getNext();
263        // This means there was no value given, just <Foo/>; we'll return empty string for now
264        if (type == END) {
265            log("No value for tag: " + name);
266            return "";
267        } else if (type != TEXT) {
268            throw new EasParserException("Expected TEXT data for tag " + name);
269        }
270
271        // Save the value
272        final String val = text;
273        // Read the next token; it had better be the end of the current tag
274        getNext();
275        // If not, throw an exception
276        if (type != END) {
277            throw new EasParserException("No END found for tag " + name);
278        }
279        return val;
280    }
281
282    /**
283     * Return the value of the current tag, as an integer. Throws EasParserException
284     * for non text data, and text data that doesn't parse as an integer. Returns
285     * 0 for empty data.
286     *
287     * @return the integer value of the current tag
288     * @throws IOException
289     */
290    public int getValueInt() throws IOException {
291        final String val = getValue();
292        if (val.length() == 0) {
293            return 0;
294        }
295
296        int num;
297        try {
298            num = Integer.parseInt(val);
299        } catch (NumberFormatException e) {
300            throw new EasParserException("Tag " + startTag + ": " + e.getMessage());
301        }
302        return num;
303    }
304
305    /**
306     * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to
307     * mark the end of the current tag and end of document.  If we hit end of document without
308     * looking for it, generate an EodException.  The tag returned consists of the page number
309     * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream.  Thus, all tags returned
310     * are unique.
311     *
312     * @param endingTag the tag that would represent the end of the tag we're processing
313     * @return the next tag found
314     * @throws IOException
315     */
316    public int nextTag(final int endingTag) throws IOException {
317        while (getNext() != DONE) {
318            // If we're a start, set tag to include the page and return it
319            if (type == START) {
320                tag = startTag.getTagNum();
321                return tag;
322            // If we're at the ending tag we're looking for, return the END signal
323            } else if (type == END && startTag.getTagNum() == endingTag) {
324                return END;
325            }
326        }
327        // We're at end of document here.  If we're looking for it, return END_DOCUMENT
328        if (endingTag == START_DOCUMENT) {
329            return END_DOCUMENT;
330        }
331        // Otherwise, we've prematurely hit end of document, so exception out
332        // EodException is a subclass of IOException; this will be treated as an IO error by
333        // EasService
334        throw new EodException();
335    }
336
337    /**
338     * Skip anything found in the stream until the end of the current tag is reached.  This can be
339     * used to ignore stretches of xml that aren't needed by the parser.
340     *
341     * @throws IOException
342     */
343    public void skipTag() throws IOException {
344        final int thisTag = startTag.getTagNum();
345        // Just loop until we hit the end of the current tag
346        while (getNext() != DONE) {
347            if (type == END && startTag.getTagNum() == thisTag) {
348                return;
349            }
350        }
351
352        // If we're at end of document, that's bad
353        throw new EofException();
354    }
355
356    /**
357     * Initializes the parser with an input stream; reads the first 4 bytes (which are always the
358     * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting
359     * page).
360     *
361     * @param in the InputStream associated with this parser
362     * @throws IOException
363     */
364    public void setInput(final InputStream in, final boolean initialize) throws IOException {
365        this.in = in;
366        if ((in != null) && initialize) {
367            // If we fail on the very first byte, report an empty stream
368            try {
369                final int version = readByte(); // version
370            } catch (EofException e) {
371                throw new EmptyStreamException();
372            }
373            readInt();  // public identifier
374            readInt();  // 106 (UTF-8)
375            final int stringTableLength = readInt();  // string table length
376            if (stringTableLength != 0) {
377                throw new EasParserException("WBXML string table unsupported");
378            }
379        }
380    }
381
382    @VisibleForTesting
383    void resetInput(final InputStream in) {
384        this.in = in;
385        try {
386            // Read leading zero
387            read();
388        } catch (IOException e) {
389        }
390    }
391
392    void log(final String str) {
393        if (!EasService.getProtocolLogging()) {
394            return;
395        }
396        final String logStr;
397        int cr = str.indexOf('\n');
398        if (cr > 0) {
399            logStr = str.substring(0, cr);
400        } else {
401            logStr = str;
402        }
403        final char [] charArray = new char[startTagArray.size() * 2];
404        Arrays.fill(charArray, ' ');
405        final String indent = new String(charArray);
406        LogUtils.d(LOG_TAG, "%s", indent + logStr);
407        if (EasService.getFileLogging()) {
408            FileLogger.log(LOG_TAG, logStr);
409        }
410    }
411
412    void logVerbose(final String str) {
413        if (LOG_VERBOSE) {
414            log(str);
415        }
416    }
417
418    protected void pushTag(final int id) {
419        page = id >>> Tags.PAGE_SHIFT;
420        push(id);
421    }
422
423    protected void pop() {
424        // Retrieve the now-current startTag from our stack
425        startTag = startTagArray.removeFirst();
426        log("</" + startTag + '>');
427    }
428
429    private void push(final int id) {
430        startTag = new Tag(page, id);
431        noContent = startTag.mNoContent;
432        log("<" + startTag + (noContent ? '/' : "") + '>');
433        // Save the startTag to our stack
434        startTagArray.addFirst(startTag);
435    }
436
437    /**
438     * Return the next piece of data from the stream.  The return value indicates the type of data
439     * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or
440     * TEXT (the value of a tag)
441     *
442     * @return the type of data retrieved
443     * @throws IOException
444     */
445    private final int getNext() throws IOException {
446        bytes = null;
447        text = null;
448
449        if (noContent) {
450            startTagArray.removeFirst();
451            type = END;
452            noContent = false;
453            return type;
454        }
455
456        int id = read();
457        while (id == Wbxml.SWITCH_PAGE) {
458            // Get the new page number
459            page = readByte();
460            // Retrieve the current tag table
461            if (!Tags.isValidPage(page)) {
462                // Unknown code page. These seem to happen mostly because of
463                // invalid data from the server so throw an exception here.
464                throw new EasParserException("Unknown code page " + page);
465            }
466            logVerbose("Page: " + page);
467            id = read();
468        }
469
470        switch (id) {
471            case EOF_BYTE:
472                // End of document
473                type = DONE;
474                break;
475
476            case Wbxml.END:
477                type = END;
478                pop();
479                break;
480
481            case Wbxml.STR_I:
482                // Inline string
483                type = TEXT;
484                text = readInlineString();
485                log(startTag + ": " + text);
486                break;
487
488            case Wbxml.OPAQUE:
489                // Integer length + opaque data
490                type = OPAQUE;
491                final int length = readInt();
492                bytes = new byte[length];
493                for (int i = 0; i < length; i++) {
494                    bytes[i] = (byte)readByte();
495                }
496                log(startTag + ": (opaque:" + length + ") ");
497                break;
498
499            default:
500                if (Tags.isGlobalTag(id & Tags.PAGE_MASK)) {
501                    throw new EasParserException(String.format(
502                                    "Unhandled WBXML global token 0x%02X", id));
503                }
504                if ((id & Wbxml.WITH_ATTRIBUTES) != 0) {
505                    throw new EasParserException(String.format(
506                                    "Attributes unsupported, tag 0x%02X", id));
507                }
508                type = START;
509                push(id);
510        }
511
512        // Return the type of data we're dealing with
513        return type;
514    }
515
516    /**
517     * Read an int from the input stream, and capture it if necessary for debugging.  Seems a small
518     * price to pay...
519     *
520     * @return the int read
521     * @throws IOException
522     */
523    private int read() throws IOException {
524        int i;
525        i = in.read();
526        if (capture) {
527            captureArray.add(i);
528        }
529        logVerbose("Byte: " + i);
530        return i;
531    }
532
533    private int readByte() throws IOException {
534        int i = read();
535        if (i == EOF_BYTE) {
536            throw new EofException();
537        }
538        return i;
539    }
540
541    /**
542     * Throws EasParserException if detects integer encoded with more than 5
543     * bytes. A uint_32 needs 5 bytes to fully encode 32 bits so if the high
544     * bit is set for more than 4 bytes, something is wrong with the data
545     * stream.
546     */
547    private int readInt() throws IOException {
548        int result = 0;
549        int i;
550        int numBytes = 0;
551
552        do {
553            if (++numBytes > 5) {
554                throw new EasParserException("Invalid integer encoding, too many bytes");
555            }
556            i = readByte();
557            result = (result << 7) | (i & 0x7f);
558        } while ((i & 0x80) != 0);
559
560        return result;
561    }
562
563    /**
564     * Read an inline string from the stream
565     *
566     * @return the String as parsed from the stream
567     * @throws IOException
568     */
569    private String readInlineString() throws IOException {
570        final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256);
571        while (true) {
572            final int i = read();
573            if (i == 0) {
574                break;
575            } else if (i == EOF_BYTE) {
576                throw new EofException();
577            }
578            outputStream.write(i);
579        }
580        outputStream.flush();
581        final String res = outputStream.toString("UTF-8");
582        outputStream.close();
583        return res;
584    }
585}
586