1/*
2 * Copyright (C) 2008-2009 Marc Blank
3 * Licensed to The Android Open Source Project.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package com.android.exchange.adapter;
19
20import android.content.Context;
21
22import com.android.exchange.Eas;
23import com.android.exchange.EasException;
24import com.android.exchange.utility.FileLogger;
25import com.android.mail.utils.LogUtils;
26import com.google.common.annotations.VisibleForTesting;
27
28import java.io.ByteArrayOutputStream;
29import java.io.FileNotFoundException;
30import java.io.FileOutputStream;
31import java.io.IOException;
32import java.io.InputStream;
33import java.util.ArrayList;
34
35/**
36 * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that
37 * EAS uses (as defined in the EAS specification)
38 *
39 */
40public abstract class Parser {
41    private static final boolean LOG_VERBOSE = false;
42
43    private static final String LOG_TAG = Eas.LOG_TAG;
44
45    // The following constants are Wbxml standard
46    public static final int START_DOCUMENT = 0;
47    public static final int DONE = 1;
48    public static final int START = 2;
49    public static final int END = 3;
50    public static final int TEXT = 4;
51    public static final int END_DOCUMENT = 3;
52    private static final int NOT_FETCHED = Integer.MIN_VALUE;
53    private static final int NOT_ENDED = Integer.MIN_VALUE;
54    private static final int EOF_BYTE = -1;
55
56    // Where tags start in a page
57    private static final int TAG_BASE = 5;
58
59    private boolean logging = false;
60    private boolean capture = false;
61
62    private ArrayList<Integer> captureArray;
63
64    // The input stream for this parser
65    private InputStream in;
66
67    // The current tag depth
68    private int depth;
69
70    // The upcoming (saved) id from the stream
71    private int nextId = NOT_FETCHED;
72
73    // The current tag table (i.e. the tag table for the current page)
74    private String[] tagTable;
75
76    // An array of tag tables, as defined in EasTags
77    static private String[][] tagTables = new String[Tags.pages.length + 1][];
78
79    // The stack of names of tags being processed; used when debug = true
80    private String[] nameArray = new String[32];
81
82    // The stack of tags being processed
83    private int[] startTagArray = new int[32];
84
85    // The following vars are available to all to avoid method calls that represent the state of
86    // the parser at any given time
87    public int endTag = NOT_ENDED;
88
89    public int startTag;
90
91    // The type of the last token read
92    public int type;
93
94    // The current page
95    public int page;
96
97    // The current tag
98    public int tag;
99
100    // The name of the current tag
101    public String name;
102
103    // Whether the current tag is associated with content (a value)
104    public boolean noContent;
105
106    // The value read, as a String.  Only one of text or num will be valid, depending on whether the
107    // value was requested as a String or an int (to avoid wasted effort in parsing)
108    public String text;
109
110    // The value read, as an int
111    public int num;
112
113    // The value read, as bytes
114    public byte[] bytes;
115
116    // TODO: Define a new parse exception type rather than lumping these in as IOExceptions.
117
118    /**
119     * Generated when the parser comes to EOF prematurely during parsing (i.e. in error)
120     */
121    public class EofException extends IOException {
122        private static final long serialVersionUID = 1L;
123    }
124
125    /**
126     * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's
127     * input stream; in other words, the stream had no content.
128     */
129    public class EmptyStreamException extends EofException {
130        private static final long serialVersionUID = 1L;
131    }
132
133    public class EodException extends IOException {
134        private static final long serialVersionUID = 1L;
135    }
136
137    public class EasParserException extends IOException {
138        private static final long serialVersionUID = 1L;
139
140        EasParserException() {
141            super("WBXML format error");
142        }
143
144        EasParserException(String reason) {
145            super(reason);
146        }
147    }
148
149    public boolean parse() throws IOException, EasException {
150        return false;
151    }
152
153    /**
154     * Initialize the tag tables; they are constant
155     *
156     */
157    {
158        String[][] pages = Tags.pages;
159        for (int i = 0; i < pages.length; i++) {
160            String[] page = pages[i];
161            if (page.length > 0) {
162                tagTables[i] = page;
163            }
164        }
165    }
166
167    public Parser(InputStream in) throws IOException {
168        setInput(in, true);
169        logging = Eas.PARSER_LOG;
170    }
171
172    /**
173     * Constructor for use when switching parsers within a input stream
174     * @param parser an existing, initialized parser
175     * @throws IOException
176     */
177    public Parser(Parser parser) throws IOException {
178        setInput(parser.in, false);
179        logging = Eas.PARSER_LOG;
180    }
181
182    /**
183     * Set the debug state of the parser.  When debugging is on, every token is logged (LogUtils.v)
184     * to the console.
185     *
186     * @param val the desired state for debug output
187     */
188    public void setDebug(boolean val) {
189        logging = val;
190    }
191
192    protected InputStream getInput() {
193        return in;
194    }
195
196    /**
197     * Turns on data capture; this is used to create test streams that represent "live" data and
198     * can be used against the various parsers.
199     */
200    public void captureOn() {
201        capture = true;
202        captureArray = new ArrayList<Integer>();
203    }
204
205    /**
206     * Turns off data capture; writes the captured data to a specified file.
207     */
208    public void captureOff(Context context, String file) {
209        try {
210            FileOutputStream out = context.openFileOutput(file, Context.MODE_WORLD_WRITEABLE);
211            out.write(captureArray.toString().getBytes());
212            out.close();
213        } catch (FileNotFoundException e) {
214            // This is debug code; exceptions aren't interesting.
215        } catch (IOException e) {
216            // This is debug code; exceptions aren't interesting.
217        }
218    }
219
220    /**
221     * Return the value of the current tag, as a byte array.  Note that the result of this call
222     * is indeterminate, and possibly null, if the value of the tag is not a byte array
223     *
224     * @return the byte array value of the current tag
225     * @throws IOException
226     */
227    public byte[] getValueBytes() throws IOException {
228        getValue();
229        return bytes;
230    }
231
232    /**
233     * Return the value of the current tag, as a String.  Note that the result of this call is
234     * indeterminate, and possibly null, if the value of the tag is not an immediate string
235     *
236     * @return the String value of the current tag
237     * @throws IOException
238     */
239    public String getValue() throws IOException {
240        // The false argument tells getNext to return the value as a String
241        getNext(false);
242        // This means there was no value given, just <Foo/>; we'll return empty string for now
243        if (type == END) {
244            if (logging) {
245                log("No value for tag: " + tagTable[startTag - TAG_BASE]);
246            }
247            return "";
248        }
249        // Save the value
250        String val = text;
251        // Read the next token; it had better be the end of the current tag
252        getNext(false);
253        // If not, throw an exception
254        if (type != END) {
255            throw new IOException("No END found!");
256        }
257        return val;
258    }
259
260    /**
261     * Return the value of the current tag, as an integer.  Note that the value of this call is
262     * indeterminate if the value of this tag is not an immediate string parsed as an integer
263     *
264     * @return the integer value of the current tag
265     * @throws IOException
266     */
267   public int getValueInt() throws IOException {
268        // The true argument to getNext indicates the desire for an integer return value
269        getNext(true);
270        if (type == END) {
271            return 0;
272        }
273        // Save the value
274        int val = num;
275        // Read the next token; it had better be the end of the current tag
276        getNext(false);
277        // If not, throw an exception
278        if (type != END) {
279            throw new IOException("No END found!");
280        }
281        return val;
282    }
283
284    /**
285     * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to
286     * mark the end of the current tag and end of document.  If we hit end of document without
287     * looking for it, generate an EodException.  The tag returned consists of the page number
288     * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream.  Thus, all tags returned
289     * are unique.
290     *
291     * @param endingTag the tag that would represent the end of the tag we're processing
292     * @return the next tag found
293     * @throws IOException
294     */
295    public int nextTag(int endingTag) throws IOException {
296        // Lose the page information
297        endTag = endingTag &= Tags.PAGE_MASK;
298        while (getNext(false) != DONE) {
299            // If we're a start, set tag to include the page and return it
300            if (type == START) {
301                tag = page | startTag;
302                return tag;
303            // If we're at the ending tag we're looking for, return the END signal
304            } else if (type == END && startTag == endTag) {
305                return END;
306            }
307        }
308        // We're at end of document here.  If we're looking for it, return END_DOCUMENT
309        if (endTag == START_DOCUMENT) {
310            return END_DOCUMENT;
311        }
312        // Otherwise, we've prematurely hit end of document, so exception out
313        // EodException is a subclass of IOException; this will be treated as an IO error by
314        // ExchangeService
315        throw new EodException();
316    }
317
318    /**
319     * Skip anything found in the stream until the end of the current tag is reached.  This can be
320     * used to ignore stretches of xml that aren't needed by the parser.
321     *
322     * @throws IOException
323     */
324    public void skipTag() throws IOException {
325        int thisTag = startTag;
326        // Just loop until we hit the end of the current tag
327        while (getNext(false) != DONE) {
328            if (type == END && startTag == thisTag) {
329                return;
330            }
331        }
332
333        // If we're at end of document, that's bad
334        throw new EofException();
335    }
336
337    /**
338     * Retrieve the next token from the input stream
339     *
340     * @return the token found
341     * @throws IOException
342     */
343    public int nextToken() throws IOException {
344        getNext(false);
345        return type;
346    }
347
348    /**
349     * Initializes the parser with an input stream; reads the first 4 bytes (which are always the
350     * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting
351     * page).
352     *
353     * @param in the InputStream associated with this parser
354     * @throws IOException
355     */
356    public void setInput(InputStream in, boolean initialize) throws IOException {
357        this.in = in;
358        if ((in != null) && initialize) {
359            // If we fail on the very first byte, report an empty stream
360            try {
361                readByte(); // version
362            } catch (EofException e) {
363                throw new EmptyStreamException();
364            }
365            readInt();  // ?
366            readInt();  // 106 (UTF-8)
367            readInt();  // string table length
368        }
369        tagTable = tagTables[0];
370    }
371
372    @VisibleForTesting
373    void resetInput(InputStream in) {
374        this.in = in;
375        try {
376            // Read leading zero
377            read();
378        } catch (IOException e) {
379        }
380    }
381
382    void log(String str) {
383        int cr = str.indexOf('\n');
384        if (cr > 0) {
385            str = str.substring(0, cr);
386        }
387        LogUtils.v(LOG_TAG, str);
388        if (Eas.FILE_LOG) {
389            FileLogger.log(LOG_TAG, str);
390        }
391    }
392
393    protected void pushTag(int id) {
394        page = id >> Tags.PAGE_SHIFT;
395        tagTable = tagTables[page];
396        push(id);
397    }
398
399    private void pop() {
400        if (logging) {
401            name = nameArray[depth];
402            log("</" + name + '>');
403        }
404        // Retrieve the now-current startTag from our stack
405        startTag = endTag = startTagArray[depth];
406        depth--;
407    }
408
409    private void push(int id) {
410        // The tag is in the low 6 bits
411        startTag = id & 0x3F;
412        // If the high bit is set, there is content (a value) to be read
413        noContent = (id & 0x40) == 0;
414        depth++;
415        if (logging) {
416            name = tagTable[startTag - TAG_BASE];
417            nameArray[depth] = name;
418            log("<" + name + (noContent ? '/' : "") + '>');
419        }
420        // Save the startTag to our stack
421        startTagArray[depth] = startTag;
422    }
423
424    /**
425     * Return the next piece of data from the stream.  The return value indicates the type of data
426     * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or
427     * TEXT (the value of a tag)
428     *
429     * @param asInt whether a TEXT value should be parsed as a String or an int.
430     * @return the type of data retrieved
431     * @throws IOException
432     */
433    private final int getNext(boolean asInt) throws IOException {
434        if (noContent) {
435            nameArray[depth--] = null;
436            type = END;
437            noContent = false;
438            return type;
439        }
440
441        text = null;
442        name = null;
443
444        int id = nextId ();
445        while (id == Wbxml.SWITCH_PAGE) {
446            nextId = NOT_FETCHED;
447            // Get the new page number
448            int pg = readByte();
449            // Save the shifted page to add into the startTag in nextTag
450            page = pg << Tags.PAGE_SHIFT;
451            if (LOG_VERBOSE) {
452                log("Page: " + page);
453            }
454            // Retrieve the current tag table
455            tagTable = tagTables[pg];
456            id = nextId();
457        }
458        nextId = NOT_FETCHED;
459
460        switch (id) {
461            case EOF_BYTE:
462                // End of document
463                type = DONE;
464                break;
465
466            case Wbxml.END:
467                type = END;
468                pop();
469                break;
470
471            case Wbxml.STR_I:
472                // Inline string
473                type = TEXT;
474                if (asInt) {
475                    num = readInlineInt();
476                } else {
477                    text = readInlineString();
478                }
479                if (logging) {
480                    name = tagTable[startTag - TAG_BASE];
481                    log(name + ": " + (asInt ? Integer.toString(num) : text));
482                }
483                break;
484
485            case Wbxml.OPAQUE:
486                // Integer length + opaque data
487                int length = readInt();
488                bytes = new byte[length];
489                for (int i = 0; i < length; i++) {
490                    bytes[i] = (byte)readByte();
491                }
492                if (logging) {
493                    name = tagTable[startTag - TAG_BASE];
494                    log(name + ": (opaque:" + length + ") ");
495                }
496                break;
497
498            default:
499                type = START;
500                push(id);
501        }
502
503        // Return the type of data we're dealing with
504        return type;
505    }
506
507    /**
508     * Read an int from the input stream, and capture it if necessary for debugging.  Seems a small
509     * price to pay...
510     *
511     * @return the int read
512     * @throws IOException
513     */
514    private int read() throws IOException {
515        int i;
516        i = in.read();
517        if (capture) {
518            captureArray.add(i);
519        }
520        if (LOG_VERBOSE) {
521            log("Byte: " + i);
522        }
523        return i;
524    }
525
526    private int nextId() throws IOException {
527        if (nextId == NOT_FETCHED) {
528            nextId = read();
529        }
530        return nextId;
531    }
532
533    private int readByte() throws IOException {
534        int i = read();
535        if (i == EOF_BYTE) {
536            throw new EofException();
537        }
538        return i;
539    }
540
541    /**
542     * Read an integer from the stream; this is called when the parser knows that what follows is
543     * an inline string representing an integer (e.g. the Read tag in Email has a value known to
544     * be either "0" or "1")
545     *
546     * @return the integer as parsed from the stream
547     * @throws IOException
548     */
549    private int readInlineInt() throws IOException {
550        int result = 0;
551
552        while (true) {
553            int i = readByte();
554            // Inline strings are always terminated with a zero byte
555            if (i == 0) {
556                return result;
557            }
558            if (i >= '0' && i <= '9') {
559                result = (result * 10) + (i - '0');
560            } else {
561                throw new IOException("Non integer");
562            }
563        }
564    }
565
566    private int readInt() throws IOException {
567        int result = 0;
568        int i;
569
570        do {
571            i = readByte();
572            result = (result << 7) | (i & 0x7f);
573        } while ((i & 0x80) != 0);
574
575        return result;
576    }
577
578    /**
579     * Read an inline string from the stream
580     *
581     * @return the String as parsed from the stream
582     * @throws IOException
583     */
584    private String readInlineString() throws IOException {
585        ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256);
586        while (true) {
587            int i = read();
588            if (i == 0) {
589                break;
590            } else if (i == EOF_BYTE) {
591                throw new EofException();
592            }
593            outputStream.write(i);
594        }
595        outputStream.flush();
596        String res = outputStream.toString("UTF-8");
597        outputStream.close();
598        return res;
599    }
600}
601