Tokenizer.cpp revision eb0953307ce75cec031aedbf21abff08e5a737e5
1647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown/*
2647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * Copyright (C) 2010 The Android Open Source Project
3647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown *
4647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * Licensed under the Apache License, Version 2.0 (the "License");
5647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * you may not use this file except in compliance with the License.
6647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * You may obtain a copy of the License at
7647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown *
8647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown *      http://www.apache.org/licenses/LICENSE-2.0
9647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown *
10647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * Unless required by applicable law or agreed to in writing, software
11647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * distributed under the License is distributed on an "AS IS" BASIS,
12647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * See the License for the specific language governing permissions and
14647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * limitations under the License.
15647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown */
16647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
17647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#define LOG_TAG "Tokenizer"
18647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
19647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <stdlib.h>
20647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <unistd.h>
21647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <fcntl.h>
22647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <errno.h>
23647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <sys/types.h>
24647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <sys/stat.h>
25647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <utils/Log.h>
26647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <utils/Tokenizer.h>
27647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
28647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown// Enables debug output for the tokenizer.
29647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#define DEBUG_TOKENIZER 0
30647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
31647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
32647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownnamespace android {
33647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
34647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownstatic inline bool isDelimiter(char ch, const char* delimiters) {
35647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    return strchr(delimiters, ch) != NULL;
36647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown}
37647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
381d618d63c1bb99728b5b0afe320f5a6afa95436cJeff BrownTokenizer::Tokenizer(const String8& filename, FileMap* fileMap, char* buffer, size_t length) :
391d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown        mFilename(filename), mFileMap(fileMap),
401d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown        mBuffer(buffer), mLength(length), mCurrent(buffer), mLineNumber(1) {
41647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown}
42647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
43647925ddf053989b641b4c5c8a51efd55c931f22Jeff BrownTokenizer::~Tokenizer() {
44d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown    if (mFileMap) {
45d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown        mFileMap->release();
461d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown    } else {
471d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown        delete[] mBuffer;
48d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown    }
49647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown}
50647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
51647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownstatus_t Tokenizer::open(const String8& filename, Tokenizer** outTokenizer) {
52647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    *outTokenizer = NULL;
53647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
54647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    int result = NO_ERROR;
55647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    int fd = ::open(filename.string(), O_RDONLY);
56647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    if (fd < 0) {
57647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        result = -errno;
58647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        LOGE("Error opening file '%s', %s.", filename.string(), strerror(errno));
59647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    } else {
60d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown        struct stat stat;
61d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown        if (fstat(fd, &stat)) {
62647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown            result = -errno;
63647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown            LOGE("Error getting size of file '%s', %s.", filename.string(), strerror(errno));
64647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        } else {
65647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown            size_t length = size_t(stat.st_size);
661d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown
67d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown            FileMap* fileMap = new FileMap();
681d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown            char* buffer;
691d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown            if (fileMap->create(NULL, fd, 0, length, true)) {
70d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown                fileMap->advise(FileMap::SEQUENTIAL);
711d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                buffer = static_cast<char*>(fileMap->getDataPtr());
721d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown            } else {
731d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                fileMap->release();
741d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                fileMap = NULL;
75647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
761d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                // Fall back to reading into a buffer since we can't mmap files in sysfs.
771d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                // The length we obtained from stat is wrong too (it will always be 4096)
781d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                // so we must trust that read will read the entire file.
791d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                buffer = new char[length];
801d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                ssize_t nrd = read(fd, buffer, length);
811d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                if (nrd < 0) {
821d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                    result = -errno;
831d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                    LOGE("Error reading file '%s', %s.", filename.string(), strerror(errno));
841d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                    delete[] buffer;
851d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                    buffer = NULL;
861d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                } else {
871d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                    length = size_t(nrd);
88647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown                }
89647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown            }
901d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown
911d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown            if (!result) {
921d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown                *outTokenizer = new Tokenizer(filename, fileMap, buffer, length);
93d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown            }
94647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        }
95647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        close(fd);
96647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    }
97647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    return result;
98647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown}
99647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
100647925ddf053989b641b4c5c8a51efd55c931f22Jeff BrownString8 Tokenizer::getLocation() const {
101647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    String8 result;
102647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    result.appendFormat("%s:%d", mFilename.string(), mLineNumber);
103647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    return result;
104647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown}
105647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
106647925ddf053989b641b4c5c8a51efd55c931f22Jeff BrownString8 Tokenizer::peekRemainderOfLine() const {
107647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    const char* end = getEnd();
108647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    const char* eol = mCurrent;
109647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    while (eol != end) {
110647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        char ch = *eol;
111647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        if (ch == '\n') {
112647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown            break;
113647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        }
114647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        eol += 1;
115647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    }
116647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    return String8(mCurrent, eol - mCurrent);
117647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown}
118647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
119647925ddf053989b641b4c5c8a51efd55c931f22Jeff BrownString8 Tokenizer::nextToken(const char* delimiters) {
120647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#if DEBUG_TOKENIZER
121eb0953307ce75cec031aedbf21abff08e5a737e5Steve Block    ALOGD("nextToken");
122647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#endif
123647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    const char* end = getEnd();
124647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    const char* tokenStart = mCurrent;
125647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    while (mCurrent != end) {
126647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        char ch = *mCurrent;
127647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        if (ch == '\n' || isDelimiter(ch, delimiters)) {
128647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown            break;
129647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        }
130647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        mCurrent += 1;
131647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    }
132647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    return String8(tokenStart, mCurrent - tokenStart);
133647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown}
134647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
135647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownvoid Tokenizer::nextLine() {
136647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#if DEBUG_TOKENIZER
137eb0953307ce75cec031aedbf21abff08e5a737e5Steve Block    ALOGD("nextLine");
138647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#endif
139647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    const char* end = getEnd();
140647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    while (mCurrent != end) {
141647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        char ch = *(mCurrent++);
142647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        if (ch == '\n') {
143647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown            mLineNumber += 1;
144647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown            break;
145647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        }
146647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    }
147647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown}
148647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
149647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownvoid Tokenizer::skipDelimiters(const char* delimiters) {
150647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#if DEBUG_TOKENIZER
151eb0953307ce75cec031aedbf21abff08e5a737e5Steve Block    ALOGD("skipDelimiters");
152647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#endif
153647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    const char* end = getEnd();
154647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    while (mCurrent != end) {
155647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        char ch = *mCurrent;
156647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        if (ch == '\n' || !isDelimiter(ch, delimiters)) {
157647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown            break;
158647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        }
159647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown        mCurrent += 1;
160647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown    }
161647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown}
162647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown
163647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} // namespace android
164