1/*
2 * Copyright (C) 2008-2009 Marc Blank
3 * Licensed to The Android Open Source Project.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package com.android.exchange.adapter;
19
20import android.content.Context;
21import android.util.Log;
22
23import com.android.exchange.Eas;
24import com.android.exchange.EasException;
25import com.android.exchange.utility.FileLogger;
26import com.google.common.annotations.VisibleForTesting;
27
28import java.io.ByteArrayOutputStream;
29import java.io.FileNotFoundException;
30import java.io.FileOutputStream;
31import java.io.IOException;
32import java.io.InputStream;
33import java.util.ArrayList;
34
35/**
36 * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that
37 * EAS uses (as defined in the EAS specification)
38 *
39 */
40public abstract class Parser {
41    private static final boolean LOG_VERBOSE = false;
42
43    // The following constants are Wbxml standard
44    public static final int START_DOCUMENT = 0;
45    public static final int DONE = 1;
46    public static final int START = 2;
47    public static final int END = 3;
48    public static final int TEXT = 4;
49    public static final int END_DOCUMENT = 3;
50    private static final int NOT_FETCHED = Integer.MIN_VALUE;
51    private static final int NOT_ENDED = Integer.MIN_VALUE;
52    private static final int EOF_BYTE = -1;
53    private boolean logging = false;
54    private boolean capture = false;
55    private String logTag = "EAS Parser";
56
57    // Where tags start in a page
58    private static final int TAG_BASE = 5;
59
60    private ArrayList<Integer> captureArray;
61
62    // The input stream for this parser
63    private InputStream in;
64
65    // The current tag depth
66    private int depth;
67
68    // The upcoming (saved) id from the stream
69    private int nextId = NOT_FETCHED;
70
71    // The current tag table (i.e. the tag table for the current page)
72    private String[] tagTable;
73
74    // An array of tag tables, as defined in EasTags
75    static private String[][] tagTables = new String[Tags.pages.length + 1][];
76
77    // The stack of names of tags being processed; used when debug = true
78    private String[] nameArray = new String[32];
79
80    // The stack of tags being processed
81    private int[] startTagArray = new int[32];
82
83    // The following vars are available to all to avoid method calls that represent the state of
84    // the parser at any given time
85    public int endTag = NOT_ENDED;
86
87    public int startTag;
88
89    // The type of the last token read
90    public int type;
91
92    // The current page
93    public int page;
94
95    // The current tag
96    public int tag;
97
98    // The name of the current tag
99    public String name;
100
101    // Whether the current tag is associated with content (a value)
102    public boolean noContent;
103
104    // The value read, as a String.  Only one of text or num will be valid, depending on whether the
105    // value was requested as a String or an int (to avoid wasted effort in parsing)
106    public String text;
107
108    // The value read, as an int
109    public int num;
110
111    // The value read, as bytes
112    public byte[] bytes;
113
114    /**
115     * Generated when the parser comes to EOF prematurely during parsing (i.e. in error)
116     */
117    public class EofException extends IOException {
118        private static final long serialVersionUID = 1L;
119    }
120
121    /**
122     * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's
123     * input stream; in other words, the stream had no content.
124     */
125    public class EmptyStreamException extends EofException {
126        private static final long serialVersionUID = 1L;
127    }
128
129    public class EodException extends IOException {
130        private static final long serialVersionUID = 1L;
131    }
132
133    public class EasParserException extends IOException {
134        private static final long serialVersionUID = 1L;
135
136        EasParserException() {
137            super("WBXML format error");
138        }
139
140        EasParserException(String reason) {
141            super(reason);
142        }
143    }
144
145    public boolean parse() throws IOException, EasException {
146        return false;
147    }
148
149    /**
150     * Initialize the tag tables; they are constant
151     *
152     */
153    {
154        String[][] pages = Tags.pages;
155        for (int i = 0; i < pages.length; i++) {
156            String[] page = pages[i];
157            if (page.length > 0) {
158                tagTables[i] = page;
159            }
160        }
161    }
162
163    public Parser(InputStream in) throws IOException {
164        setInput(in, true);
165        logging = Eas.PARSER_LOG;
166    }
167
168    /**
169     * Constructor for use when switching parsers within a input stream
170     * @param parser an existing, initialized parser
171     * @throws IOException
172     */
173    public Parser(Parser parser) throws IOException {
174        setInput(parser.in, false);
175        logging = Eas.PARSER_LOG;
176    }
177
178    /**
179     * Set the debug state of the parser.  When debugging is on, every token is logged (Log.v) to
180     * the console.
181     *
182     * @param val the desired state for debug output
183     */
184    public void setDebug(boolean val) {
185        logging = val;
186    }
187
188    protected InputStream getInput() {
189        return in;
190    }
191
192    /**
193     * Set the tag used for logging.  When debugging is on, every token is logged (Log.v) to
194     * the console.
195     *
196     * @param val the logging tag
197     */
198    public void setLoggingTag(String val) {
199        logTag = val;
200    }
201
202    /**
203     * Turns on data capture; this is used to create test streams that represent "live" data and
204     * can be used against the various parsers.
205     */
206    public void captureOn() {
207        capture = true;
208        captureArray = new ArrayList<Integer>();
209    }
210
211    /**
212     * Turns off data capture; writes the captured data to a specified file.
213     */
214    public void captureOff(Context context, String file) {
215        try {
216            FileOutputStream out = context.openFileOutput(file, Context.MODE_WORLD_WRITEABLE);
217            out.write(captureArray.toString().getBytes());
218            out.close();
219        } catch (FileNotFoundException e) {
220            // This is debug code; exceptions aren't interesting.
221        } catch (IOException e) {
222            // This is debug code; exceptions aren't interesting.
223        }
224    }
225
226    /**
227     * Return the value of the current tag, as a byte array.  Note that the result of this call
228     * is indeterminate, and possibly null, if the value of the tag is not a byte array
229     *
230     * @return the byte array value of the current tag
231     * @throws IOException
232     */
233    public byte[] getValueBytes() throws IOException {
234        getValue();
235        return bytes;
236    }
237
238    /**
239     * Return the value of the current tag, as a String.  Note that the result of this call is
240     * indeterminate, and possibly null, if the value of the tag is not an immediate string
241     *
242     * @return the String value of the current tag
243     * @throws IOException
244     */
245    public String getValue() throws IOException {
246        // The false argument tells getNext to return the value as a String
247        getNext(false);
248        // This means there was no value given, just <Foo/>; we'll return empty string for now
249        if (type == END) {
250            if (logging) {
251                log("No value for tag: " + tagTable[startTag - TAG_BASE]);
252            }
253            return "";
254        }
255        // Save the value
256        String val = text;
257        // Read the next token; it had better be the end of the current tag
258        getNext(false);
259        // If not, throw an exception
260        if (type != END) {
261            throw new IOException("No END found!");
262        }
263        return val;
264    }
265
266    /**
267     * Return the value of the current tag, as an integer.  Note that the value of this call is
268     * indeterminate if the value of this tag is not an immediate string parsed as an integer
269     *
270     * @return the integer value of the current tag
271     * @throws IOException
272     */
273   public int getValueInt() throws IOException {
274        // The true argument to getNext indicates the desire for an integer return value
275        getNext(true);
276        if (type == END) {
277            return 0;
278        }
279        // Save the value
280        int val = num;
281        // Read the next token; it had better be the end of the current tag
282        getNext(false);
283        // If not, throw an exception
284        if (type != END) {
285            throw new IOException("No END found!");
286        }
287        return val;
288    }
289
290    /**
291     * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to
292     * mark the end of the current tag and end of document.  If we hit end of document without
293     * looking for it, generate an EodException.  The tag returned consists of the page number
294     * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream.  Thus, all tags returned
295     * are unique.
296     *
297     * @param endingTag the tag that would represent the end of the tag we're processing
298     * @return the next tag found
299     * @throws IOException
300     */
301    public int nextTag(int endingTag) throws IOException {
302        // Lose the page information
303        endTag = endingTag &= Tags.PAGE_MASK;
304        while (getNext(false) != DONE) {
305            // If we're a start, set tag to include the page and return it
306            if (type == START) {
307                tag = page | startTag;
308                return tag;
309            // If we're at the ending tag we're looking for, return the END signal
310            } else if (type == END && startTag == endTag) {
311                return END;
312            }
313        }
314        // We're at end of document here.  If we're looking for it, return END_DOCUMENT
315        if (endTag == START_DOCUMENT) {
316            return END_DOCUMENT;
317        }
318        // Otherwise, we've prematurely hit end of document, so exception out
319        // EodException is a subclass of IOException; this will be treated as an IO error by
320        // ExchangeService
321        throw new EodException();
322    }
323
324    /**
325     * Skip anything found in the stream until the end of the current tag is reached.  This can be
326     * used to ignore stretches of xml that aren't needed by the parser.
327     *
328     * @throws IOException
329     */
330    public void skipTag() throws IOException {
331        int thisTag = startTag;
332        // Just loop until we hit the end of the current tag
333        while (getNext(false) != DONE) {
334            if (type == END && startTag == thisTag) {
335                return;
336            }
337        }
338
339        // If we're at end of document, that's bad
340        throw new EofException();
341    }
342
343    /**
344     * Retrieve the next token from the input stream
345     *
346     * @return the token found
347     * @throws IOException
348     */
349    public int nextToken() throws IOException {
350        getNext(false);
351        return type;
352    }
353
354    /**
355     * Initializes the parser with an input stream; reads the first 4 bytes (which are always the
356     * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting
357     * page).
358     *
359     * @param in the InputStream associated with this parser
360     * @throws IOException
361     */
362    public void setInput(InputStream in, boolean initialize) throws IOException {
363        this.in = in;
364        if ((in != null) && initialize) {
365            // If we fail on the very first byte, report an empty stream
366            try {
367                readByte(); // version
368            } catch (EofException e) {
369                throw new EmptyStreamException();
370            }
371            readInt();  // ?
372            readInt();  // 106 (UTF-8)
373            readInt();  // string table length
374        }
375        tagTable = tagTables[0];
376    }
377
378    @VisibleForTesting
379    void resetInput(InputStream in) {
380        this.in = in;
381        try {
382            // Read leading zero
383            read();
384        } catch (IOException e) {
385        }
386    }
387
388    void log(String str) {
389        int cr = str.indexOf('\n');
390        if (cr > 0) {
391            str = str.substring(0, cr);
392        }
393        Log.v(logTag, str);
394        if (Eas.FILE_LOG) {
395            FileLogger.log(logTag, str);
396        }
397    }
398
399    protected void pushTag(int id) {
400        page = id >> Tags.PAGE_SHIFT;
401        tagTable = tagTables[page];
402        push(id);
403    }
404
405    private void pop() {
406        if (logging) {
407            name = nameArray[depth];
408            log("</" + name + '>');
409        }
410        // Retrieve the now-current startTag from our stack
411        startTag = endTag = startTagArray[depth];
412        depth--;
413    }
414
415    private void push(int id) {
416        // The tag is in the low 6 bits
417        startTag = id & 0x3F;
418        // If the high bit is set, there is content (a value) to be read
419        noContent = (id & 0x40) == 0;
420        depth++;
421        if (logging) {
422            name = tagTable[startTag - TAG_BASE];
423            nameArray[depth] = name;
424            log("<" + name + (noContent ? '/' : "") + '>');
425        }
426        // Save the startTag to our stack
427        startTagArray[depth] = startTag;
428    }
429
430    /**
431     * Return the next piece of data from the stream.  The return value indicates the type of data
432     * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or
433     * TEXT (the value of a tag)
434     *
435     * @param asInt whether a TEXT value should be parsed as a String or an int.
436     * @return the type of data retrieved
437     * @throws IOException
438     */
439    private final int getNext(boolean asInt) throws IOException {
440        if (noContent) {
441            nameArray[depth--] = null;
442            type = END;
443            noContent = false;
444            return type;
445        }
446
447        text = null;
448        name = null;
449
450        int id = nextId ();
451        while (id == Wbxml.SWITCH_PAGE) {
452            nextId = NOT_FETCHED;
453            // Get the new page number
454            int pg = readByte();
455            // Save the shifted page to add into the startTag in nextTag
456            page = pg << Tags.PAGE_SHIFT;
457            if (LOG_VERBOSE) {
458                log("Page: " + page);
459            }
460            // Retrieve the current tag table
461            tagTable = tagTables[pg];
462            id = nextId();
463        }
464        nextId = NOT_FETCHED;
465
466        switch (id) {
467            case EOF_BYTE:
468                // End of document
469                type = DONE;
470                break;
471
472            case Wbxml.END:
473                type = END;
474                pop();
475                break;
476
477            case Wbxml.STR_I:
478                // Inline string
479                type = TEXT;
480                if (asInt) {
481                    num = readInlineInt();
482                } else {
483                    text = readInlineString();
484                }
485                if (logging) {
486                    name = tagTable[startTag - TAG_BASE];
487                    log(name + ": " + (asInt ? Integer.toString(num) : text));
488                }
489                break;
490
491            case Wbxml.OPAQUE:
492                // Integer length + opaque data
493                int length = readInt();
494                bytes = new byte[length];
495                for (int i = 0; i < length; i++) {
496                    bytes[i] = (byte)readByte();
497                }
498                if (logging) {
499                    name = tagTable[startTag - TAG_BASE];
500                    log(name + ": (opaque:" + length + ") ");
501                }
502                break;
503
504            default:
505                type = START;
506                push(id);
507        }
508
509        // Return the type of data we're dealing with
510        return type;
511    }
512
513    /**
514     * Read an int from the input stream, and capture it if necessary for debugging.  Seems a small
515     * price to pay...
516     *
517     * @return the int read
518     * @throws IOException
519     */
520    private int read() throws IOException {
521        int i;
522        i = in.read();
523        if (capture) {
524            captureArray.add(i);
525        }
526        if (LOG_VERBOSE) {
527            log("Byte: " + i);
528        }
529        return i;
530    }
531
532    private int nextId() throws IOException {
533        if (nextId == NOT_FETCHED) {
534            nextId = read();
535        }
536        return nextId;
537    }
538
539    private int readByte() throws IOException {
540        int i = read();
541        if (i == EOF_BYTE) {
542            throw new EofException();
543        }
544        return i;
545    }
546
547    /**
548     * Read an integer from the stream; this is called when the parser knows that what follows is
549     * an inline string representing an integer (e.g. the Read tag in Email has a value known to
550     * be either "0" or "1")
551     *
552     * @return the integer as parsed from the stream
553     * @throws IOException
554     */
555    private int readInlineInt() throws IOException {
556        int result = 0;
557
558        while (true) {
559            int i = readByte();
560            // Inline strings are always terminated with a zero byte
561            if (i == 0) {
562                return result;
563            }
564            if (i >= '0' && i <= '9') {
565                result = (result * 10) + (i - '0');
566            } else {
567                throw new IOException("Non integer");
568            }
569        }
570    }
571
572    private int readInt() throws IOException {
573        int result = 0;
574        int i;
575
576        do {
577            i = readByte();
578            result = (result << 7) | (i & 0x7f);
579        } while ((i & 0x80) != 0);
580
581        return result;
582    }
583
584    /**
585     * Read an inline string from the stream
586     *
587     * @return the String as parsed from the stream
588     * @throws IOException
589     */
590    private String readInlineString() throws IOException {
591        ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256);
592        while (true) {
593            int i = read();
594            if (i == 0) {
595                break;
596            } else if (i == EOF_BYTE) {
597                throw new EofException();
598            }
599            outputStream.write(i);
600        }
601        outputStream.flush();
602        String res = outputStream.toString("UTF-8");
603        outputStream.close();
604        return res;
605    }
606}
607