Tokenizer.cpp revision 9ee93d18edb42d55441b636aa7e001260f1b758d
1a3477c862a5debcac7dfb076749059406ec59512Jeff Brown/*
2a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * Copyright (C) 2010 The Android Open Source Project
3a3477c862a5debcac7dfb076749059406ec59512Jeff Brown *
4a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * Licensed under the Apache License, Version 2.0 (the "License");
5a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * you may not use this file except in compliance with the License.
6a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * You may obtain a copy of the License at
7a3477c862a5debcac7dfb076749059406ec59512Jeff Brown *
8a3477c862a5debcac7dfb076749059406ec59512Jeff Brown *      http://www.apache.org/licenses/LICENSE-2.0
9a3477c862a5debcac7dfb076749059406ec59512Jeff Brown *
10a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * Unless required by applicable law or agreed to in writing, software
11a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * distributed under the License is distributed on an "AS IS" BASIS,
12a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * See the License for the specific language governing permissions and
14a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * limitations under the License.
15a3477c862a5debcac7dfb076749059406ec59512Jeff Brown */
16a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
17a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#define LOG_TAG "Tokenizer"
18a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
19a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <stdlib.h>
20a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <unistd.h>
21a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <fcntl.h>
22a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <errno.h>
23a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <sys/types.h>
24a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <sys/stat.h>
25a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <utils/Log.h>
26a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <utils/Tokenizer.h>
27a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
28a3477c862a5debcac7dfb076749059406ec59512Jeff Brown// Enables debug output for the tokenizer.
29a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#define DEBUG_TOKENIZER 0
30a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
31a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
32a3477c862a5debcac7dfb076749059406ec59512Jeff Brownnamespace android {
33a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
34a3477c862a5debcac7dfb076749059406ec59512Jeff Brownstatic inline bool isDelimiter(char ch, const char* delimiters) {
35a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    return strchr(delimiters, ch) != NULL;
36a3477c862a5debcac7dfb076749059406ec59512Jeff Brown}
37a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
38a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
399ee93d18edb42d55441b636aa7e001260f1b758dJeff BrownTokenizer::Tokenizer(const String8& filename, FileMap* fileMap,
409ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown        const char* buffer, size_t length) :
419ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown        mFilename(filename), mFileMap(fileMap), mBuffer(buffer), mLength(length),
42a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        mCurrent(buffer), mLineNumber(1) {
43a3477c862a5debcac7dfb076749059406ec59512Jeff Brown}
44a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
45a3477c862a5debcac7dfb076749059406ec59512Jeff BrownTokenizer::~Tokenizer() {
469ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown    if (mFileMap) {
479ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown        mFileMap->release();
489ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown    }
49a3477c862a5debcac7dfb076749059406ec59512Jeff Brown}
50a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
51a3477c862a5debcac7dfb076749059406ec59512Jeff Brownstatus_t Tokenizer::open(const String8& filename, Tokenizer** outTokenizer) {
52a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    *outTokenizer = NULL;
53a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
54a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    int result = NO_ERROR;
55a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    int fd = ::open(filename.string(), O_RDONLY);
56a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    if (fd < 0) {
57a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        result = -errno;
58a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        LOGE("Error opening file '%s', %s.", filename.string(), strerror(errno));
59a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    } else {
609ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown        struct stat stat;
619ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown        if (fstat(fd, &stat)) {
62a3477c862a5debcac7dfb076749059406ec59512Jeff Brown            result = -errno;
63a3477c862a5debcac7dfb076749059406ec59512Jeff Brown            LOGE("Error getting size of file '%s', %s.", filename.string(), strerror(errno));
64a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        } else {
65a3477c862a5debcac7dfb076749059406ec59512Jeff Brown            size_t length = size_t(stat.st_size);
669ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown            FileMap* fileMap = new FileMap();
679ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown            if (!fileMap->create(NULL, fd, 0, length, true)) {
689ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown                result = NO_MEMORY;
69a3477c862a5debcac7dfb076749059406ec59512Jeff Brown                LOGE("Error mapping file '%s', %s.", filename.string(), strerror(errno));
70a3477c862a5debcac7dfb076749059406ec59512Jeff Brown            } else {
719ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown                fileMap->advise(FileMap::SEQUENTIAL);
72a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
739ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown                *outTokenizer = new Tokenizer(filename, fileMap,
749ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown                        static_cast<const char*>(fileMap->getDataPtr()), length);
75a3477c862a5debcac7dfb076749059406ec59512Jeff Brown                if (!*outTokenizer) {
76a3477c862a5debcac7dfb076749059406ec59512Jeff Brown                    result = NO_MEMORY;
77a3477c862a5debcac7dfb076749059406ec59512Jeff Brown                    LOGE("Error allocating tokenizer for file=%s.", filename.string());
78a3477c862a5debcac7dfb076749059406ec59512Jeff Brown                }
79a3477c862a5debcac7dfb076749059406ec59512Jeff Brown            }
809ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown            if (result) {
819ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown                fileMap->release();
829ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown            }
83a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        }
84a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        close(fd);
85a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    }
86a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    return result;
87a3477c862a5debcac7dfb076749059406ec59512Jeff Brown}
88a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
89a3477c862a5debcac7dfb076749059406ec59512Jeff BrownString8 Tokenizer::getLocation() const {
90a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    String8 result;
91a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    result.appendFormat("%s:%d", mFilename.string(), mLineNumber);
92a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    return result;
93a3477c862a5debcac7dfb076749059406ec59512Jeff Brown}
94a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
95a3477c862a5debcac7dfb076749059406ec59512Jeff BrownString8 Tokenizer::peekRemainderOfLine() const {
96a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    const char* end = getEnd();
97a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    const char* eol = mCurrent;
98a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    while (eol != end) {
99a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        char ch = *eol;
100a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        if (ch == '\n') {
101a3477c862a5debcac7dfb076749059406ec59512Jeff Brown            break;
102a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        }
103a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        eol += 1;
104a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    }
105a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    return String8(mCurrent, eol - mCurrent);
106a3477c862a5debcac7dfb076749059406ec59512Jeff Brown}
107a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
108a3477c862a5debcac7dfb076749059406ec59512Jeff BrownString8 Tokenizer::nextToken(const char* delimiters) {
109a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#if DEBUG_TOKENIZER
110a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    LOGD("nextToken");
111a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#endif
112a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    const char* end = getEnd();
113a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    const char* tokenStart = mCurrent;
114a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    while (mCurrent != end) {
115a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        char ch = *mCurrent;
116a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        if (ch == '\n' || isDelimiter(ch, delimiters)) {
117a3477c862a5debcac7dfb076749059406ec59512Jeff Brown            break;
118a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        }
119a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        mCurrent += 1;
120a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    }
121a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    return String8(tokenStart, mCurrent - tokenStart);
122a3477c862a5debcac7dfb076749059406ec59512Jeff Brown}
123a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
124a3477c862a5debcac7dfb076749059406ec59512Jeff Brownvoid Tokenizer::nextLine() {
125a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#if DEBUG_TOKENIZER
126a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    LOGD("nextLine");
127a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#endif
128a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    const char* end = getEnd();
129a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    while (mCurrent != end) {
130a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        char ch = *(mCurrent++);
131a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        if (ch == '\n') {
132a3477c862a5debcac7dfb076749059406ec59512Jeff Brown            mLineNumber += 1;
133a3477c862a5debcac7dfb076749059406ec59512Jeff Brown            break;
134a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        }
135a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    }
136a3477c862a5debcac7dfb076749059406ec59512Jeff Brown}
137a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
138a3477c862a5debcac7dfb076749059406ec59512Jeff Brownvoid Tokenizer::skipDelimiters(const char* delimiters) {
139a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#if DEBUG_TOKENIZER
140a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    LOGD("skipDelimiters");
141a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#endif
142a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    const char* end = getEnd();
143a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    while (mCurrent != end) {
144a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        char ch = *mCurrent;
145a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        if (ch == '\n' || !isDelimiter(ch, delimiters)) {
146a3477c862a5debcac7dfb076749059406ec59512Jeff Brown            break;
147a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        }
148a3477c862a5debcac7dfb076749059406ec59512Jeff Brown        mCurrent += 1;
149a3477c862a5debcac7dfb076749059406ec59512Jeff Brown    }
150a3477c862a5debcac7dfb076749059406ec59512Jeff Brown}
151a3477c862a5debcac7dfb076749059406ec59512Jeff Brown
152a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} // namespace android
153