Tokenizer.cpp revision 9ee93d18edb42d55441b636aa7e001260f1b758d
1a3477c862a5debcac7dfb076749059406ec59512Jeff Brown/* 2a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * Copyright (C) 2010 The Android Open Source Project 3a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * 4a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * Licensed under the Apache License, Version 2.0 (the "License"); 5a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * you may not use this file except in compliance with the License. 6a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * You may obtain a copy of the License at 7a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * 8a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * http://www.apache.org/licenses/LICENSE-2.0 9a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * 10a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * Unless required by applicable law or agreed to in writing, software 11a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * distributed under the License is distributed on an "AS IS" BASIS, 12a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * See the License for the specific language governing permissions and 14a3477c862a5debcac7dfb076749059406ec59512Jeff Brown * limitations under the License. 15a3477c862a5debcac7dfb076749059406ec59512Jeff Brown */ 16a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 17a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#define LOG_TAG "Tokenizer" 18a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 19a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <stdlib.h> 20a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <unistd.h> 21a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <fcntl.h> 22a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <errno.h> 23a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <sys/types.h> 24a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <sys/stat.h> 25a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <utils/Log.h> 26a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#include <utils/Tokenizer.h> 27a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 28a3477c862a5debcac7dfb076749059406ec59512Jeff Brown// Enables debug output for the tokenizer. 29a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#define DEBUG_TOKENIZER 0 30a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 31a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 32a3477c862a5debcac7dfb076749059406ec59512Jeff Brownnamespace android { 33a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 34a3477c862a5debcac7dfb076749059406ec59512Jeff Brownstatic inline bool isDelimiter(char ch, const char* delimiters) { 35a3477c862a5debcac7dfb076749059406ec59512Jeff Brown return strchr(delimiters, ch) != NULL; 36a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 37a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 38a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 399ee93d18edb42d55441b636aa7e001260f1b758dJeff BrownTokenizer::Tokenizer(const String8& filename, FileMap* fileMap, 409ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown const char* buffer, size_t length) : 419ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown mFilename(filename), mFileMap(fileMap), mBuffer(buffer), mLength(length), 42a3477c862a5debcac7dfb076749059406ec59512Jeff Brown mCurrent(buffer), mLineNumber(1) { 43a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 44a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 45a3477c862a5debcac7dfb076749059406ec59512Jeff BrownTokenizer::~Tokenizer() { 469ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown if (mFileMap) { 479ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown mFileMap->release(); 489ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown } 49a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 50a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 51a3477c862a5debcac7dfb076749059406ec59512Jeff Brownstatus_t Tokenizer::open(const String8& filename, Tokenizer** outTokenizer) { 52a3477c862a5debcac7dfb076749059406ec59512Jeff Brown *outTokenizer = NULL; 53a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 54a3477c862a5debcac7dfb076749059406ec59512Jeff Brown int result = NO_ERROR; 55a3477c862a5debcac7dfb076749059406ec59512Jeff Brown int fd = ::open(filename.string(), O_RDONLY); 56a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (fd < 0) { 57a3477c862a5debcac7dfb076749059406ec59512Jeff Brown result = -errno; 58a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGE("Error opening file '%s', %s.", filename.string(), strerror(errno)); 59a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } else { 609ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown struct stat stat; 619ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown if (fstat(fd, &stat)) { 62a3477c862a5debcac7dfb076749059406ec59512Jeff Brown result = -errno; 63a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGE("Error getting size of file '%s', %s.", filename.string(), strerror(errno)); 64a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } else { 65a3477c862a5debcac7dfb076749059406ec59512Jeff Brown size_t length = size_t(stat.st_size); 669ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown FileMap* fileMap = new FileMap(); 679ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown if (!fileMap->create(NULL, fd, 0, length, true)) { 689ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown result = NO_MEMORY; 69a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGE("Error mapping file '%s', %s.", filename.string(), strerror(errno)); 70a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } else { 719ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown fileMap->advise(FileMap::SEQUENTIAL); 72a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 739ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown *outTokenizer = new Tokenizer(filename, fileMap, 749ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown static_cast<const char*>(fileMap->getDataPtr()), length); 75a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (!*outTokenizer) { 76a3477c862a5debcac7dfb076749059406ec59512Jeff Brown result = NO_MEMORY; 77a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGE("Error allocating tokenizer for file=%s.", filename.string()); 78a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 79a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 809ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown if (result) { 819ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown fileMap->release(); 829ee93d18edb42d55441b636aa7e001260f1b758dJeff Brown } 83a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 84a3477c862a5debcac7dfb076749059406ec59512Jeff Brown close(fd); 85a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 86a3477c862a5debcac7dfb076749059406ec59512Jeff Brown return result; 87a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 88a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 89a3477c862a5debcac7dfb076749059406ec59512Jeff BrownString8 Tokenizer::getLocation() const { 90a3477c862a5debcac7dfb076749059406ec59512Jeff Brown String8 result; 91a3477c862a5debcac7dfb076749059406ec59512Jeff Brown result.appendFormat("%s:%d", mFilename.string(), mLineNumber); 92a3477c862a5debcac7dfb076749059406ec59512Jeff Brown return result; 93a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 94a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 95a3477c862a5debcac7dfb076749059406ec59512Jeff BrownString8 Tokenizer::peekRemainderOfLine() const { 96a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* end = getEnd(); 97a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* eol = mCurrent; 98a3477c862a5debcac7dfb076749059406ec59512Jeff Brown while (eol != end) { 99a3477c862a5debcac7dfb076749059406ec59512Jeff Brown char ch = *eol; 100a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (ch == '\n') { 101a3477c862a5debcac7dfb076749059406ec59512Jeff Brown break; 102a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 103a3477c862a5debcac7dfb076749059406ec59512Jeff Brown eol += 1; 104a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 105a3477c862a5debcac7dfb076749059406ec59512Jeff Brown return String8(mCurrent, eol - mCurrent); 106a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 107a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 108a3477c862a5debcac7dfb076749059406ec59512Jeff BrownString8 Tokenizer::nextToken(const char* delimiters) { 109a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#if DEBUG_TOKENIZER 110a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGD("nextToken"); 111a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#endif 112a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* end = getEnd(); 113a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* tokenStart = mCurrent; 114a3477c862a5debcac7dfb076749059406ec59512Jeff Brown while (mCurrent != end) { 115a3477c862a5debcac7dfb076749059406ec59512Jeff Brown char ch = *mCurrent; 116a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (ch == '\n' || isDelimiter(ch, delimiters)) { 117a3477c862a5debcac7dfb076749059406ec59512Jeff Brown break; 118a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 119a3477c862a5debcac7dfb076749059406ec59512Jeff Brown mCurrent += 1; 120a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 121a3477c862a5debcac7dfb076749059406ec59512Jeff Brown return String8(tokenStart, mCurrent - tokenStart); 122a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 123a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 124a3477c862a5debcac7dfb076749059406ec59512Jeff Brownvoid Tokenizer::nextLine() { 125a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#if DEBUG_TOKENIZER 126a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGD("nextLine"); 127a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#endif 128a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* end = getEnd(); 129a3477c862a5debcac7dfb076749059406ec59512Jeff Brown while (mCurrent != end) { 130a3477c862a5debcac7dfb076749059406ec59512Jeff Brown char ch = *(mCurrent++); 131a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (ch == '\n') { 132a3477c862a5debcac7dfb076749059406ec59512Jeff Brown mLineNumber += 1; 133a3477c862a5debcac7dfb076749059406ec59512Jeff Brown break; 134a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 135a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 136a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 137a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 138a3477c862a5debcac7dfb076749059406ec59512Jeff Brownvoid Tokenizer::skipDelimiters(const char* delimiters) { 139a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#if DEBUG_TOKENIZER 140a3477c862a5debcac7dfb076749059406ec59512Jeff Brown LOGD("skipDelimiters"); 141a3477c862a5debcac7dfb076749059406ec59512Jeff Brown#endif 142a3477c862a5debcac7dfb076749059406ec59512Jeff Brown const char* end = getEnd(); 143a3477c862a5debcac7dfb076749059406ec59512Jeff Brown while (mCurrent != end) { 144a3477c862a5debcac7dfb076749059406ec59512Jeff Brown char ch = *mCurrent; 145a3477c862a5debcac7dfb076749059406ec59512Jeff Brown if (ch == '\n' || !isDelimiter(ch, delimiters)) { 146a3477c862a5debcac7dfb076749059406ec59512Jeff Brown break; 147a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 148a3477c862a5debcac7dfb076749059406ec59512Jeff Brown mCurrent += 1; 149a3477c862a5debcac7dfb076749059406ec59512Jeff Brown } 150a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} 151a3477c862a5debcac7dfb076749059406ec59512Jeff Brown 152a3477c862a5debcac7dfb076749059406ec59512Jeff Brown} // namespace android 153