Tokenizer.cpp revision eb0953307ce75cec031aedbf21abff08e5a737e5
1647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown/* 2647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * Copyright (C) 2010 The Android Open Source Project 3647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * 4647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * Licensed under the Apache License, Version 2.0 (the "License"); 5647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * you may not use this file except in compliance with the License. 6647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * You may obtain a copy of the License at 7647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * 8647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * http://www.apache.org/licenses/LICENSE-2.0 9647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * 10647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * Unless required by applicable law or agreed to in writing, software 11647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * distributed under the License is distributed on an "AS IS" BASIS, 12647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * See the License for the specific language governing permissions and 14647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown * limitations under the License. 15647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown */ 16647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 17647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#define LOG_TAG "Tokenizer" 18647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 19647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <stdlib.h> 20647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <unistd.h> 21647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <fcntl.h> 22647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <errno.h> 23647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <sys/types.h> 24647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <sys/stat.h> 25647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <utils/Log.h> 26647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#include <utils/Tokenizer.h> 27647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 28647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown// Enables debug output for the tokenizer. 29647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#define DEBUG_TOKENIZER 0 30647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 31647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 32647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownnamespace android { 33647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 34647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownstatic inline bool isDelimiter(char ch, const char* delimiters) { 35647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown return strchr(delimiters, ch) != NULL; 36647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} 37647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 381d618d63c1bb99728b5b0afe320f5a6afa95436cJeff BrownTokenizer::Tokenizer(const String8& filename, FileMap* fileMap, char* buffer, size_t length) : 391d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown mFilename(filename), mFileMap(fileMap), 401d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown mBuffer(buffer), mLength(length), mCurrent(buffer), mLineNumber(1) { 41647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} 42647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 43647925ddf053989b641b4c5c8a51efd55c931f22Jeff BrownTokenizer::~Tokenizer() { 44d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown if (mFileMap) { 45d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown mFileMap->release(); 461d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown } else { 471d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown delete[] mBuffer; 48d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown } 49647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} 50647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 51647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownstatus_t Tokenizer::open(const String8& filename, Tokenizer** outTokenizer) { 52647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown *outTokenizer = NULL; 53647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 54647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown int result = NO_ERROR; 55647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown int fd = ::open(filename.string(), O_RDONLY); 56647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown if (fd < 0) { 57647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown result = -errno; 58647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown LOGE("Error opening file '%s', %s.", filename.string(), strerror(errno)); 59647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } else { 60d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown struct stat stat; 61d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown if (fstat(fd, &stat)) { 62647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown result = -errno; 63647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown LOGE("Error getting size of file '%s', %s.", filename.string(), strerror(errno)); 64647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } else { 65647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown size_t length = size_t(stat.st_size); 661d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown 67d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown FileMap* fileMap = new FileMap(); 681d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown char* buffer; 691d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown if (fileMap->create(NULL, fd, 0, length, true)) { 70d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown fileMap->advise(FileMap::SEQUENTIAL); 711d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown buffer = static_cast<char*>(fileMap->getDataPtr()); 721d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown } else { 731d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown fileMap->release(); 741d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown fileMap = NULL; 75647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 761d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown // Fall back to reading into a buffer since we can't mmap files in sysfs. 771d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown // The length we obtained from stat is wrong too (it will always be 4096) 781d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown // so we must trust that read will read the entire file. 791d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown buffer = new char[length]; 801d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown ssize_t nrd = read(fd, buffer, length); 811d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown if (nrd < 0) { 821d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown result = -errno; 831d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown LOGE("Error reading file '%s', %s.", filename.string(), strerror(errno)); 841d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown delete[] buffer; 851d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown buffer = NULL; 861d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown } else { 871d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown length = size_t(nrd); 88647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 89647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 901d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown 911d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown if (!result) { 921d618d63c1bb99728b5b0afe320f5a6afa95436cJeff Brown *outTokenizer = new Tokenizer(filename, fileMap, buffer, length); 93d36ec3afdac0ca4158a786b96599ee8bdf64b043Jeff Brown } 94647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 95647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown close(fd); 96647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 97647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown return result; 98647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} 99647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 100647925ddf053989b641b4c5c8a51efd55c931f22Jeff BrownString8 Tokenizer::getLocation() const { 101647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown String8 result; 102647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown result.appendFormat("%s:%d", mFilename.string(), mLineNumber); 103647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown return result; 104647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} 105647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 106647925ddf053989b641b4c5c8a51efd55c931f22Jeff BrownString8 Tokenizer::peekRemainderOfLine() const { 107647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown const char* end = getEnd(); 108647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown const char* eol = mCurrent; 109647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown while (eol != end) { 110647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown char ch = *eol; 111647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown if (ch == '\n') { 112647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown break; 113647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 114647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown eol += 1; 115647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 116647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown return String8(mCurrent, eol - mCurrent); 117647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} 118647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 119647925ddf053989b641b4c5c8a51efd55c931f22Jeff BrownString8 Tokenizer::nextToken(const char* delimiters) { 120647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#if DEBUG_TOKENIZER 121eb0953307ce75cec031aedbf21abff08e5a737e5Steve Block ALOGD("nextToken"); 122647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#endif 123647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown const char* end = getEnd(); 124647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown const char* tokenStart = mCurrent; 125647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown while (mCurrent != end) { 126647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown char ch = *mCurrent; 127647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown if (ch == '\n' || isDelimiter(ch, delimiters)) { 128647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown break; 129647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 130647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown mCurrent += 1; 131647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 132647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown return String8(tokenStart, mCurrent - tokenStart); 133647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} 134647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 135647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownvoid Tokenizer::nextLine() { 136647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#if DEBUG_TOKENIZER 137eb0953307ce75cec031aedbf21abff08e5a737e5Steve Block ALOGD("nextLine"); 138647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#endif 139647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown const char* end = getEnd(); 140647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown while (mCurrent != end) { 141647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown char ch = *(mCurrent++); 142647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown if (ch == '\n') { 143647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown mLineNumber += 1; 144647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown break; 145647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 146647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 147647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} 148647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 149647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brownvoid Tokenizer::skipDelimiters(const char* delimiters) { 150647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#if DEBUG_TOKENIZER 151eb0953307ce75cec031aedbf21abff08e5a737e5Steve Block ALOGD("skipDelimiters"); 152647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown#endif 153647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown const char* end = getEnd(); 154647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown while (mCurrent != end) { 155647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown char ch = *mCurrent; 156647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown if (ch == '\n' || !isDelimiter(ch, delimiters)) { 157647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown break; 158647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 159647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown mCurrent += 1; 160647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown } 161647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} 162647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown 163647925ddf053989b641b4c5c8a51efd55c931f22Jeff Brown} // namespace android 164