1647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown/*
2647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * Copyright (C) 2010 The Android Open Source Project
3647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown *
4647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * Licensed under the Apache License, Version 2.0 (the "License");
5647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * you may not use this file except in compliance with the License.
6647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * You may obtain a copy of the License at
7647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown *
8647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown *      http://www.apache.org/licenses/LICENSE-2.0
9647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown *
10647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * Unless required by applicable law or agreed to in writing, software
11647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * distributed under the License is distributed on an "AS IS" BASIS,
12647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * See the License for the specific language governing permissions and
14647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * limitations under the License.
15647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown */
16647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
17647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#ifndef _UTILS_TOKENIZER_H
18647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#define _UTILS_TOKENIZER_H
19647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
20647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <assert.h>
21647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <utils/Errors.h>
22d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown#include <utils/FileMap.h>
23647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <utils/String8.h>
24647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
25647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownnamespace android {
26647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
27647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown/**
28647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * A simple tokenizer for loading and parsing ASCII text files line by line.
29647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown */
30647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownclass Tokenizer {
312c1627dc49994f83a636efd1970825b519bd93cbJeff Brown    Tokenizer(const String8& filename, FileMap* fileMap, char* buffer,
322c1627dc49994f83a636efd1970825b519bd93cbJeff Brown            bool ownBuffer, size_t length);
33647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
34647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownpublic:
35647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    ~Tokenizer();
36647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
37647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
38647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Opens a file and maps it into memory.
39647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     *
40647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Returns NO_ERROR and a tokenizer for the file, if successful.
41647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Otherwise returns an error and sets outTokenizer to NULL.
42647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
43647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    static status_t open(const String8& filename, Tokenizer** outTokenizer);
44647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
45647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
462c1627dc49994f83a636efd1970825b519bd93cbJeff Brown     * Prepares to tokenize the contents of a string.
472c1627dc49994f83a636efd1970825b519bd93cbJeff Brown     *
482c1627dc49994f83a636efd1970825b519bd93cbJeff Brown     * Returns NO_ERROR and a tokenizer for the string, if successful.
492c1627dc49994f83a636efd1970825b519bd93cbJeff Brown     * Otherwise returns an error and sets outTokenizer to NULL.
502c1627dc49994f83a636efd1970825b519bd93cbJeff Brown     */
512c1627dc49994f83a636efd1970825b519bd93cbJeff Brown    static status_t fromContents(const String8& filename,
522c1627dc49994f83a636efd1970825b519bd93cbJeff Brown            const char* contents, Tokenizer** outTokenizer);
532c1627dc49994f83a636efd1970825b519bd93cbJeff Brown
542c1627dc49994f83a636efd1970825b519bd93cbJeff Brown    /**
55647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Returns true if at the end of the file.
56647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
57647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    inline bool isEof() const { return mCurrent == getEnd(); }
58647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
59647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
60647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Returns true if at the end of the line or end of the file.
61647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
62647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    inline bool isEol() const { return isEof() || *mCurrent == '\n'; }
63647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
64647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
65647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Gets the name of the file.
66647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
67647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    inline String8 getFilename() const { return mFilename; }
68647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
69647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
70647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Gets a 1-based line number index for the current position.
71647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
72647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    inline int32_t getLineNumber() const { return mLineNumber; }
73647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
74647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
75647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Formats a location string consisting of the filename and current line number.
76647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Returns a string like "MyFile.txt:33".
77647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
78647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    String8 getLocation() const;
79647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
80647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
81647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Gets the character at the current position.
82647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Returns null at end of file.
83647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
84647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    inline char peekChar() const { return isEof() ? '\0' : *mCurrent; }
85647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
86647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
87647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Gets the remainder of the current line as a string, excluding the newline character.
88647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
89647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    String8 peekRemainderOfLine() const;
90647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
91647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
92647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Gets the character at the current position and advances past it.
93647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Returns null at end of file.
94647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
95647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    inline char nextChar() { return isEof() ? '\0' : *(mCurrent++); }
96647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
97647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
98647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Gets the next token on this line stopping at the specified delimiters
99647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * or the end of the line whichever comes first and advances past it.
100647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Also stops at embedded nulls.
101647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Returns the token or an empty string if the current character is a delimiter
102647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * or is at the end of the line.
103647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
104647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    String8 nextToken(const char* delimiters);
105647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
106647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
107647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Advances to the next line.
108647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Does nothing if already at the end of the file.
109647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
110647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    void nextLine();
111647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
112647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    /**
113647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Skips over the specified delimiters in the line.
114647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     * Also skips embedded nulls.
115647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown     */
116647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    void skipDelimiters(const char* delimiters);
117647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
118647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownprivate:
119647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    Tokenizer(const Tokenizer& other); // not copyable
120647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
121647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    String8 mFilename;
122d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown    FileMap* mFileMap;
1231d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown    char* mBuffer;
1242c1627dc49994f83a636efd1970825b519bd93cbJeff Brown    bool mOwnBuffer;
125647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    size_t mLength;
126647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
127647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    const char* mCurrent;
128647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    int32_t mLineNumber;
129647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
130647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    inline const char* getEnd() const { return mBuffer + mLength; }
131647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
132647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown};
133647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
134647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} // namespace android
135647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
136647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#endif // _UTILS_TOKENIZER_H
137