Tokenizer.cpp revision db360642ed7a48eb3b3607a791bbe449cc6529bb
1a3477c862a5debcac7dfb076749059406ec59512Jeff Brown/* 2a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * Copyright (C) 2010 The Android Open Source Project 3a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * 4a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * Licensed under the Apache License, Version 2.0 (the "License"); 5a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * you may not use this file except in compliance with the License. 6a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * You may obtain a copy of the License at 7a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * 8a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * http://www.apache.org/licenses/LICENSE-2.0 9a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * 10a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * Unless required by applicable law or agreed to in writing, software 11a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * distributed under the License is distributed on an "AS IS" BASIS, 12a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * See the License for the specific language governing permissions and 14a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * limitations under the License. 15a3477c862a5debcac7dfb076749059406ec59512Jeff Brown */ 16a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 17a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#define LOG_TAG "Tokenizer" 18a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 19a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <stdlib.h> 20a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <unistd.h> 21a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <fcntl.h> 22a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <errno.h> 23a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <sys/types.h> 24a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <sys/stat.h> 25a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <utils/Log.h> 26a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <utils/Tokenizer.h> 27a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 28a3477c862a5debcac7dfb076749059406ec59512Jeff Brown// Enables debug output for the tokenizer. 29a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#define DEBUG_TOKENIZER 0 30a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 31a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 32a3477c862a5debcac7dfb076749059406ec59512Jeff Brownnamespace android { 33a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 34a3477c862a5debcac7dfb076749059406ec59512Jeff Brownstatic inline bool isDelimiter(char ch, const char* delimiters) { 35a3477c862a5debcac7dfb076749059406ec59512Jeff Brown return strchr(delimiters, ch) != NULL; 36a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 37a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 38db360642ed7a48eb3b3607a791bbe449cc6529bbJeff BrownTokenizer::Tokenizer(const String8& filename, FileMap* fileMap, char* buffer, size_t length) : 39db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown mFilename(filename), mFileMap(fileMap), 40db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown mBuffer(buffer), mLength(length), mCurrent(buffer), mLineNumber(1) { 41a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 42a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 43a3477c862a5debcac7dfb076749059406ec59512Jeff BrownTokenizer::~Tokenizer() { 449ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown if (mFileMap) { 459ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown mFileMap->release(); 46db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown } else { 47db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown delete[] mBuffer; 489ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown } 49a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 50a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 51a3477c862a5debcac7dfb076749059406ec59512Jeff Brownstatus_t Tokenizer::open(const String8& filename, Tokenizer** outTokenizer) { 52a3477c862a5debcac7dfb076749059406ec59512Jeff Brown *outTokenizer = NULL; 53a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 54a3477c862a5debcac7dfb076749059406ec59512Jeff Brown int result = NO_ERROR; 55a3477c862a5debcac7dfb076749059406ec59512Jeff Brown int fd = ::open(filename.string(), O_RDONLY); 56a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (fd < 0) { 57a3477c862a5debcac7dfb076749059406ec59512Jeff Brown result = -errno; 58a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGE("Error opening file '%s', %s.", filename.string(), strerror(errno)); 59a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } else { 609ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown struct stat stat; 619ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown if (fstat(fd, &stat)) { 62a3477c862a5debcac7dfb076749059406ec59512Jeff Brown result = -errno; 63a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGE("Error getting size of file '%s', %s.", filename.string(), strerror(errno)); 64a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } else { 65a3477c862a5debcac7dfb076749059406ec59512Jeff Brown size_t length = size_t(stat.st_size); 66db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown 679ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown FileMap* fileMap = new FileMap(); 68db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown char* buffer; 69db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown if (fileMap->create(NULL, fd, 0, length, true)) { 709ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown fileMap->advise(FileMap::SEQUENTIAL); 71db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown buffer = static_cast<char*>(fileMap->getDataPtr()); 72db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown } else { 73db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown fileMap->release(); 74db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown fileMap = NULL; 75a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 76db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown // Fall back to reading into a buffer since we can't mmap files in sysfs. 77db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown // The length we obtained from stat is wrong too (it will always be 4096) 78db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown // so we must trust that read will read the entire file. 79db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown buffer = new char[length]; 80db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown ssize_t nrd = read(fd, buffer, length); 81db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown if (nrd < 0) { 82db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown result = -errno; 83db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown LOGE("Error reading file '%s', %s.", filename.string(), strerror(errno)); 84db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown delete[] buffer; 85db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown buffer = NULL; 86db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown } else { 87db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown length = size_t(nrd); 88a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 89a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 90db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown 91db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown if (!result) { 92db360642ed7a48eb3b3607a791bbe449cc6529bbJeff Brown *outTokenizer = new Tokenizer(filename, fileMap, buffer, length); 939ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown } 94a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 95a3477c862a5debcac7dfb076749059406ec59512Jeff Brown close(fd); 96a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 97a3477c862a5debcac7dfb076749059406ec59512Jeff Brown return result; 98a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 99a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 100a3477c862a5debcac7dfb076749059406ec59512Jeff BrownString8 Tokenizer::getLocation() const { 101a3477c862a5debcac7dfb076749059406ec59512Jeff Brown String8 result; 102a3477c862a5debcac7dfb076749059406ec59512Jeff Brown result.appendFormat("%s:%d", mFilename.string(), mLineNumber); 103a3477c862a5debcac7dfb076749059406ec59512Jeff Brown return result; 104a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 105a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 106a3477c862a5debcac7dfb076749059406ec59512Jeff BrownString8 Tokenizer::peekRemainderOfLine() const { 107a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* end = getEnd(); 108a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* eol = mCurrent; 109a3477c862a5debcac7dfb076749059406ec59512Jeff Brown while (eol != end) { 110a3477c862a5debcac7dfb076749059406ec59512Jeff Brown char ch = *eol; 111a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (ch == '\n') { 112a3477c862a5debcac7dfb076749059406ec59512Jeff Brown break; 113a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 114a3477c862a5debcac7dfb076749059406ec59512Jeff Brown eol += 1; 115a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 116a3477c862a5debcac7dfb076749059406ec59512Jeff Brown return String8(mCurrent, eol - mCurrent); 117a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 118a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 119a3477c862a5debcac7dfb076749059406ec59512Jeff BrownString8 Tokenizer::nextToken(const char* delimiters) { 120a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#if DEBUG_TOKENIZER 121a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGD("nextToken"); 122a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#endif 123a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* end = getEnd(); 124a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* tokenStart = mCurrent; 125a3477c862a5debcac7dfb076749059406ec59512Jeff Brown while (mCurrent != end) { 126a3477c862a5debcac7dfb076749059406ec59512Jeff Brown char ch = *mCurrent; 127a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (ch == '\n' || isDelimiter(ch, delimiters)) { 128a3477c862a5debcac7dfb076749059406ec59512Jeff Brown break; 129a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 130a3477c862a5debcac7dfb076749059406ec59512Jeff Brown mCurrent += 1; 131a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 132a3477c862a5debcac7dfb076749059406ec59512Jeff Brown return String8(tokenStart, mCurrent - tokenStart); 133a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 134a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 135a3477c862a5debcac7dfb076749059406ec59512Jeff Brownvoid Tokenizer::nextLine() { 136a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#if DEBUG_TOKENIZER 137a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGD("nextLine"); 138a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#endif 139a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* end = getEnd(); 140a3477c862a5debcac7dfb076749059406ec59512Jeff Brown while (mCurrent != end) { 141a3477c862a5debcac7dfb076749059406ec59512Jeff Brown char ch = *(mCurrent++); 142a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (ch == '\n') { 143a3477c862a5debcac7dfb076749059406ec59512Jeff Brown mLineNumber += 1; 144a3477c862a5debcac7dfb076749059406ec59512Jeff Brown break; 145a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 146a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 147a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 148a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 149a3477c862a5debcac7dfb076749059406ec59512Jeff Brownvoid Tokenizer::skipDelimiters(const char* delimiters) { 150a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#if DEBUG_TOKENIZER 151a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGD("skipDelimiters"); 152a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#endif 153a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* end = getEnd(); 154a3477c862a5debcac7dfb076749059406ec59512Jeff Brown while (mCurrent != end) { 155a3477c862a5debcac7dfb076749059406ec59512Jeff Brown char ch = *mCurrent; 156a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (ch == '\n' || !isDelimiter(ch, delimiters)) { 157a3477c862a5debcac7dfb076749059406ec59512Jeff Brown break; 158a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 159a3477c862a5debcac7dfb076749059406ec59512Jeff Brown mCurrent += 1; 160a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 161a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 162a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 163a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} // namespace android 164