1fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Protocol Buffers - Google's data interchange format 2fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Copyright 2008 Google Inc. All rights reserved. 3afb4b72037e3f13db208590fc782c4bc8e27f862Jeff Davidson// https://developers.google.com/protocol-buffers/ 4fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 5fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Redistribution and use in source and binary forms, with or without 6fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// modification, are permitted provided that the following conditions are 7fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// met: 8fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 9fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// * Redistributions of source code must retain the above copyright 10fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// notice, this list of conditions and the following disclaimer. 11fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// * Redistributions in binary form must reproduce the above 12fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// copyright notice, this list of conditions and the following disclaimer 13fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// in the documentation and/or other materials provided with the 14fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// distribution. 15fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// * Neither the name of Google Inc. nor the names of its 16fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// contributors may be used to endorse or promote products derived from 17fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// this software without specific prior written permission. 18fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 19fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 31fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// from google3/strings/strutil.cc 32fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 33fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#include <google/protobuf/stubs/strutil.h> 34fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#include <errno.h> 35fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#include <float.h> // FLT_DIG and DBL_DIG 36fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#include <limits> 37fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#include <limits.h> 38fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#include <stdio.h> 39d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville#include <iterator> 40fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 41fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#ifdef _WIN32 42fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// MSVC has only _snprintf, not snprintf. 43fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 44fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// MinGW has both snprintf and _snprintf, but they appear to be different 45fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// functions. The former is buggy. When invoked like so: 46fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// char buffer[32]; 47fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f); 48fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// it prints "1.23000e+10". This is plainly wrong: %g should never print 49fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// trailing zeros after the decimal point. For some reason this bug only 50fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// occurs with some input values, not all. In any case, _snprintf does the 51fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// right thing, so we use it. 52fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#define snprintf _snprintf 53fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#endif 54fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 55fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillenamespace google { 56fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillenamespace protobuf { 57fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 58fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleinline bool IsNaN(double value) { 59fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // NaN is never equal to anything, even itself. 60fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return value != value; 61fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 62fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 63fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// These are defined as macros on some platforms. #undef them so that we can 64fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// redefine them. 65fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#undef isxdigit 66fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#undef isprint 67fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 68fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// The definitions of these in ctype.h change based on locale. Since our 69fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// string manipulation is all in relation to the protocol buffer and C++ 70fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// languages, we always want to use the C locale. So, we re-define these 71fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// exactly as we want them. 72fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleinline bool isxdigit(char c) { 73fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return ('0' <= c && c <= '9') || 74fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ('a' <= c && c <= 'f') || 75fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ('A' <= c && c <= 'F'); 76fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 77fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 78fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleinline bool isprint(char c) { 79fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return c >= 0x20 && c <= 0x7E; 80fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 81fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 82fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 83fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// StripString 84fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Replaces any occurrence of the character 'remove' (or the characters 85fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// in 'remove') with the character 'replacewith'. 86fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 87fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillevoid StripString(string* s, const char* remove, char replacewith) { 88fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char * str_start = s->c_str(); 89fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char * str = str_start; 90fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville for (str = strpbrk(str, remove); 91fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville str != NULL; 92fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville str = strpbrk(str + 1, remove)) { 93fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville (*s)[str - str_start] = replacewith; 94fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 95fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 96fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 97fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 98fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// StringReplace() 99fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Replace the "old" pattern with the "new" pattern in a string, 100fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// and append the result to "res". If replace_all is false, 101fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// it only replaces the first instance of "old." 102fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 103fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 104fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillevoid StringReplace(const string& s, const string& oldsub, 105fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const string& newsub, bool replace_all, 106fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville string* res) { 107fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (oldsub.empty()) { 108fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville res->append(s); // if empty, append the given string. 109fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return; 110fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 111fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 112fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville string::size_type start_pos = 0; 113fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville string::size_type pos; 114fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville do { 115fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville pos = s.find(oldsub, start_pos); 116fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (pos == string::npos) { 117fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville break; 118fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 119fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville res->append(s, start_pos, pos - start_pos); 120fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville res->append(newsub); 121fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville start_pos = pos + oldsub.size(); // start searching again after the "old" 122fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } while (replace_all); 123fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville res->append(s, start_pos, s.length() - start_pos); 124fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 125fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 126fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 127fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// StringReplace() 128fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Give me a string and two patterns "old" and "new", and I replace 129fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// the first instance of "old" in the string with "new", if it 130fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// exists. If "global" is true; call this repeatedly until it 131fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// fails. RETURN a new string, regardless of whether the replacement 132fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// happened or not. 133fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 134fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 135fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring StringReplace(const string& s, const string& oldsub, 136fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const string& newsub, bool replace_all) { 137fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville string ret; 138fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville StringReplace(s, oldsub, newsub, replace_all, &ret); 139fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return ret; 140fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 141fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 142fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 143fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// SplitStringUsing() 144fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Split a string using a character delimiter. Append the components 145fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// to 'result'. 146fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 147fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Note: For multi-character delimiters, this routine will split on *ANY* of 148fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// the characters in the string, not the entire string as a single delimiter. 149fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 150fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilletemplate <typename ITR> 151fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestatic inline 152fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillevoid SplitStringToIteratorUsing(const string& full, 153fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char* delim, 154fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ITR& result) { 155fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Optimize the common case where delim is a single character. 156fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (delim[0] != '\0' && delim[1] == '\0') { 157fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char c = delim[0]; 158fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char* p = full.data(); 159fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char* end = p + full.size(); 160fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville while (p != end) { 161fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (*p == c) { 162fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ++p; 163fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 164fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char* start = p; 165fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville while (++p != end && *p != c); 166fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *result++ = string(start, p - start); 167fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 168fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 169fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return; 170fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 171fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 172fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville string::size_type begin_index, end_index; 173fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville begin_index = full.find_first_not_of(delim); 174fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville while (begin_index != string::npos) { 175fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville end_index = full.find_first_of(delim, begin_index); 176fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (end_index == string::npos) { 177fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *result++ = full.substr(begin_index); 178fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return; 179fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 180fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *result++ = full.substr(begin_index, (end_index - begin_index)); 181fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville begin_index = full.find_first_not_of(delim, end_index); 182fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 183fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 184fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 185fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillevoid SplitStringUsing(const string& full, 186fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char* delim, 187fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville vector<string>* result) { 188fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville back_insert_iterator< vector<string> > it(*result); 189fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville SplitStringToIteratorUsing(full, delim, it); 190fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 191fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 192a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// Split a string using a character delimiter. Append the components 193a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// to 'result'. If there are consecutive delimiters, this function 194a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// will return corresponding empty strings. The string is split into 195a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// at most the specified number of pieces greedily. This means that the 196a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// last piece may possibly be split further. To split into as many pieces 197a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// as possible, specify 0 as the number of pieces. 198a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// 199a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// If "full" is the empty string, yields an empty string as the only value. 200a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// 201a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// If "pieces" is negative for some reason, it returns the whole string 202a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson// ---------------------------------------------------------------------- 203a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidsontemplate <typename StringType, typename ITR> 204a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidsonstatic inline 205a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidsonvoid SplitStringToIteratorAllowEmpty(const StringType& full, 206a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson const char* delim, 207a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson int pieces, 208a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson ITR& result) { 209a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson string::size_type begin_index, end_index; 210a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson begin_index = 0; 211a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson 212a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson for (int i = 0; (i < pieces-1) || (pieces == 0); i++) { 213a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson end_index = full.find_first_of(delim, begin_index); 214a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (end_index == string::npos) { 215a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *result++ = full.substr(begin_index); 216a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return; 217a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 218a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *result++ = full.substr(begin_index, (end_index - begin_index)); 219a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson begin_index = end_index + 1; 220a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 221a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *result++ = full.substr(begin_index); 222a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson} 223a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson 224a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidsonvoid SplitStringAllowEmpty(const string& full, const char* delim, 225a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson vector<string>* result) { 226a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson back_insert_iterator<vector<string> > it(*result); 227a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson SplitStringToIteratorAllowEmpty(full, delim, 0, it); 228a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson} 229a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson 230fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 231fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// JoinStrings() 232fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// This merges a vector of string components with delim inserted 233fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// as separaters between components. 234fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 235fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 236fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilletemplate <class ITERATOR> 237fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestatic void JoinStringsIterator(const ITERATOR& start, 238fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const ITERATOR& end, 239fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char* delim, 240fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville string* result) { 241fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_CHECK(result != NULL); 242fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville result->clear(); 243fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int delim_length = strlen(delim); 244fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 245fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Precompute resulting length so we can reserve() memory in one shot. 246fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int length = 0; 247fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville for (ITERATOR iter = start; iter != end; ++iter) { 248fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (iter != start) { 249fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville length += delim_length; 250fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 251fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville length += iter->size(); 252fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 253fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville result->reserve(length); 254fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 255fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Now combine everything. 256fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville for (ITERATOR iter = start; iter != end; ++iter) { 257fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (iter != start) { 258fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville result->append(delim, delim_length); 259fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 260fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville result->append(iter->data(), iter->size()); 261fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 262fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 263fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 264fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillevoid JoinStrings(const vector<string>& components, 265fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char* delim, 266fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville string * result) { 267fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville JoinStringsIterator(components.begin(), components.end(), delim, result); 268fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 269fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 270fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 271fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// UnescapeCEscapeSequences() 272fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// This does all the unescaping that C does: \ooo, \r, \n, etc 273fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Returns length of resulting string. 274fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// The implementation of \x parses any positive number of hex digits, 275fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// but it is an error if the value requires more than 8 bits, and the 276fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// result is truncated to 8 bits. 277fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 278fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// The second call stores its errors in a supplied string vector. 279fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// If the string vector pointer is NULL, it reports the errors with LOG(). 280fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 281fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 282fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7')) 283fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 284fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleinline int hex_digit_to_int(char c) { 285fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville /* Assume ASCII. */ 286fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61); 287fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville assert(isxdigit(c)); 288fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int x = static_cast<unsigned char>(c); 289fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (x > '9') { 290fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville x += 9; 291fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 292fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return x & 0xf; 293fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 294fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 295fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Protocol buffers doesn't ever care about errors, but I don't want to remove 296fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// the code. 297fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false) 298fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 299fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleint UnescapeCEscapeSequences(const char* source, char* dest) { 300fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return UnescapeCEscapeSequences(source, dest, NULL); 301fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 302fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 303fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleint UnescapeCEscapeSequences(const char* source, char* dest, 304fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville vector<string> *errors) { 305fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented."; 306fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 307fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char* d = dest; 308fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char* p = source; 309fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 310fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Small optimization for case where source = dest and there's no escaping 311fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville while ( p == d && *p != '\0' && *p != '\\' ) 312fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville p++, d++; 313fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 314fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville while (*p != '\0') { 315fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (*p != '\\') { 316fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *d++ = *p++; 317fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 318fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville switch ( *++p ) { // skip past the '\\' 319fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '\0': 320fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville LOG_STRING(ERROR, errors) << "String cannot end with \\"; 321fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *d = '\0'; 322fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return d - dest; // we're done with p 323fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case 'a': *d++ = '\a'; break; 324fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case 'b': *d++ = '\b'; break; 325fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case 'f': *d++ = '\f'; break; 326fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case 'n': *d++ = '\n'; break; 327fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case 'r': *d++ = '\r'; break; 328fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case 't': *d++ = '\t'; break; 329fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case 'v': *d++ = '\v'; break; 330fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '\\': *d++ = '\\'; break; 331fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '?': *d++ = '\?'; break; // \? Who knew? 332fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '\'': *d++ = '\''; break; 333fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '"': *d++ = '\"'; break; 334fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits 335fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '4': case '5': case '6': case '7': { 336fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char ch = *p - '0'; 337fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if ( IS_OCTAL_DIGIT(p[1]) ) 338fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ch = ch * 8 + *++p - '0'; 339fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice 340fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ch = ch * 8 + *++p - '0'; // now points at last digit 341fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *d++ = ch; 342fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville break; 343fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 344fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case 'x': case 'X': { 345fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (!isxdigit(p[1])) { 346fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (p[1] == '\0') { 347fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville LOG_STRING(ERROR, errors) << "String cannot end with \\x"; 348fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 349fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville LOG_STRING(ERROR, errors) << 350fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville "\\x cannot be followed by non-hex digit: \\" << *p << p[1]; 351fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 352fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville break; 353fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 354fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville unsigned int ch = 0; 355fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char *hex_start = p; 356fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville while (isxdigit(p[1])) // arbitrarily many hex digits 357fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ch = (ch << 4) + hex_digit_to_int(*++p); 358fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (ch > 0xFF) 359fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville LOG_STRING(ERROR, errors) << "Value of " << 360fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits"; 361fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *d++ = ch; 362fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville break; 363fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 364fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#if 0 // TODO(kenton): Support \u and \U? Requires runetochar(). 365fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case 'u': { 366fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // \uhhhh => convert 4 hex digits to UTF-8 367fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char32 rune = 0; 368fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char *hex_start = p; 369fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville for (int i = 0; i < 4; ++i) { 370fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (isxdigit(p[1])) { // Look one char ahead. 371fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. 372fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 373fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville LOG_STRING(ERROR, errors) 374fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville << "\\u must be followed by 4 hex digits: \\" 375fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville << string(hex_start, p+1-hex_start); 376fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville break; 377fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 378fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 379fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville d += runetochar(d, &rune); 380fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville break; 381fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 382fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case 'U': { 383fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // \Uhhhhhhhh => convert 8 hex digits to UTF-8 384fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char32 rune = 0; 385fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char *hex_start = p; 386fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville for (int i = 0; i < 8; ++i) { 387fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (isxdigit(p[1])) { // Look one char ahead. 388fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Don't change rune until we're sure this 389fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // is within the Unicode limit, but do advance p. 390fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char32 newrune = (rune << 4) + hex_digit_to_int(*++p); 391fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (newrune > 0x10FFFF) { 392fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville LOG_STRING(ERROR, errors) 393fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville << "Value of \\" 394fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville << string(hex_start, p + 1 - hex_start) 395fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville << " exceeds Unicode limit (0x10FFFF)"; 396fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville break; 397fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 398fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville rune = newrune; 399fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 400fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 401fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville LOG_STRING(ERROR, errors) 402fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville << "\\U must be followed by 8 hex digits: \\" 403fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville << string(hex_start, p+1-hex_start); 404fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville break; 405fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 406fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 407fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville d += runetochar(d, &rune); 408fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville break; 409fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 410fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#endif 411fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville default: 412fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p; 413fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 414fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville p++; // read past letter we escaped 415fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 416fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 417fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *d = '\0'; 418fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return d - dest; 419fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 420fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 421fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 422fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// UnescapeCEscapeString() 423fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// This does the same thing as UnescapeCEscapeSequences, but creates 424fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// a new string. The caller does not need to worry about allocating 425fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// a dest buffer. This should be used for non performance critical 426fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// tasks such as printing debug messages. It is safe for src and dest 427fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// to be the same. 428fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 429fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// The second call stores its errors in a supplied string vector. 430fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// If the string vector pointer is NULL, it reports the errors with LOG(). 431fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 432fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// In the first and second calls, the length of dest is returned. In the 433fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// the third call, the new string is returned. 434fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 435fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleint UnescapeCEscapeString(const string& src, string* dest) { 436fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return UnescapeCEscapeString(src, dest, NULL); 437fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 438fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 439fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleint UnescapeCEscapeString(const string& src, string* dest, 440fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville vector<string> *errors) { 441fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville scoped_array<char> unescaped(new char[src.size() + 1]); 442fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors); 443fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_CHECK(dest); 444fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville dest->assign(unescaped.get(), len); 445fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return len; 446fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 447fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 448fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring UnescapeCEscapeString(const string& src) { 449fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville scoped_array<char> unescaped(new char[src.size() + 1]); 450fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL); 451fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return string(unescaped.get(), len); 452fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 453fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 454fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 455fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// CEscapeString() 456fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// CHexEscapeString() 457fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Copies 'src' to 'dest', escaping dangerous characters using 458fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// C-style escape sequences. This is very useful for preparing query 459fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// flags. 'src' and 'dest' should not overlap. The 'Hex' version uses 460fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// hexadecimal rather than octal sequences. 461fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Returns the number of bytes written to 'dest' (not including the \0) 462fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// or -1 if there was insufficient space. 463fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 464fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. 465fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 466d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Savilleint CEscapeInternal(const char* src, int src_len, char* dest, 467d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville int dest_len, bool use_hex, bool utf8_safe) { 468fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char* src_end = src + src_len; 469fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int used = 0; 470fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville bool last_hex_escape = false; // true if last output char was \xNN 471fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 472fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville for (; src < src_end; src++) { 473fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (dest_len - used < 2) // Need space for two letter escape 474fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return -1; 475fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 476fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville bool is_hex_escape = false; 477fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville switch (*src) { 478fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break; 479fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break; 480fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '\t': dest[used++] = '\\'; dest[used++] = 't'; break; 481fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break; 482fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '\'': dest[used++] = '\\'; dest[used++] = '\''; break; 483fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break; 484fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville default: 485fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Note that if we emit \xNN and the src character after that is a hex 486fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // digit then that digit must be escaped too to prevent it being 487fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // interpreted as part of the character code by C. 488d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville if ((!utf8_safe || static_cast<uint8>(*src) < 0x80) && 489d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville (!isprint(*src) || 490d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville (last_hex_escape && isxdigit(*src)))) { 491fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (dest_len - used < 4) // need space for 4 letter escape 492fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return -1; 493fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"), 494fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville static_cast<uint8>(*src)); 495fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville is_hex_escape = use_hex; 496fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville used += 4; 497fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 498fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville dest[used++] = *src; break; 499fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 500fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 501fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville last_hex_escape = is_hex_escape; 502fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 503fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 504fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (dest_len - used < 1) // make sure that there is room for \0 505fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return -1; 506fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 507fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville dest[used] = '\0'; // doesn't count towards return value though 508fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return used; 509fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 510fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 511fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleint CEscapeString(const char* src, int src_len, char* dest, int dest_len) { 512d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville return CEscapeInternal(src, src_len, dest, dest_len, false, false); 513fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 514fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 515fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 516fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// CEscape() 517fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// CHexEscape() 518fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Copies 'src' to result, escaping dangerous characters using 519fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// C-style escape sequences. This is very useful for preparing query 520fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// flags. 'src' and 'dest' should not overlap. The 'Hex' version 521fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// hexadecimal rather than octal sequences. 522fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 523fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. 524fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 525fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring CEscape(const string& src) { 526fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const int dest_length = src.size() * 4 + 1; // Maximum possible expansion 527fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville scoped_array<char> dest(new char[dest_length]); 528fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const int len = CEscapeInternal(src.data(), src.size(), 529d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville dest.get(), dest_length, false, false); 530fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_DCHECK_GE(len, 0); 531fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return string(dest.get(), len); 532fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 533fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 534d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Savillenamespace strings { 535d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville 536d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Savillestring Utf8SafeCEscape(const string& src) { 537d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville const int dest_length = src.size() * 4 + 1; // Maximum possible expansion 538d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville scoped_array<char> dest(new char[dest_length]); 539d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville const int len = CEscapeInternal(src.data(), src.size(), 540d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville dest.get(), dest_length, false, true); 541d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville GOOGLE_DCHECK_GE(len, 0); 542d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville return string(dest.get(), len); 543d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville} 544d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville 545d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Savillestring CHexEscape(const string& src) { 546d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville const int dest_length = src.size() * 4 + 1; // Maximum possible expansion 547d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville scoped_array<char> dest(new char[dest_length]); 548d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville const int len = CEscapeInternal(src.data(), src.size(), 549d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville dest.get(), dest_length, true, false); 550d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville GOOGLE_DCHECK_GE(len, 0); 551d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville return string(dest.get(), len); 552d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville} 553d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville 554d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville} // namespace strings 555d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville 556fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 557fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// strto32_adaptor() 558fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// strtou32_adaptor() 559fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Implementation of strto[u]l replacements that have identical 560fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// overflow and underflow characteristics for both ILP-32 and LP-64 561fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// platforms, including errno preservation in error-free calls. 562fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 563fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 564fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleint32 strto32_adaptor(const char *nptr, char **endptr, int base) { 565fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const int saved_errno = errno; 566fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville errno = 0; 567fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const long result = strtol(nptr, endptr, base); 568fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (errno == ERANGE && result == LONG_MIN) { 569fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return kint32min; 570fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else if (errno == ERANGE && result == LONG_MAX) { 571fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return kint32max; 572fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else if (errno == 0 && result < kint32min) { 573fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville errno = ERANGE; 574fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return kint32min; 575fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else if (errno == 0 && result > kint32max) { 576fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville errno = ERANGE; 577fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return kint32max; 578fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 579fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (errno == 0) 580fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville errno = saved_errno; 581fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return static_cast<int32>(result); 582fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 583fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 584fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleuint32 strtou32_adaptor(const char *nptr, char **endptr, int base) { 585fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const int saved_errno = errno; 586fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville errno = 0; 587fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const unsigned long result = strtoul(nptr, endptr, base); 588fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (errno == ERANGE && result == ULONG_MAX) { 589fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return kuint32max; 590fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else if (errno == 0 && result > kuint32max) { 591fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville errno = ERANGE; 592fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return kuint32max; 593fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 594fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (errno == 0) 595fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville errno = saved_errno; 596fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return static_cast<uint32>(result); 597fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 598fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 599a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidsoninline bool safe_parse_sign(string* text /*inout*/, 600a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson bool* negative_ptr /*output*/) { 601a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson const char* start = text->data(); 602a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson const char* end = start + text->size(); 603a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson 604a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // Consume whitespace. 605a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson while (start < end && (start[0] == ' ')) { 606a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson ++start; 607a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 608a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson while (start < end && (end[-1] == ' ')) { 609a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson --end; 610a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 611a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (start >= end) { 612a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return false; 613a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 614a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson 615a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // Consume sign. 616a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *negative_ptr = (start[0] == '-'); 617a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (*negative_ptr || start[0] == '+') { 618a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson ++start; 619a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (start >= end) { 620a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return false; 621a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 622a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 623a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *text = text->substr(start - text->data(), end - start); 624a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return true; 625a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson} 626a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson 627a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidsoninline bool safe_parse_positive_int( 628a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson string text, int32* value_p) { 629a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson int base = 10; 630a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson int32 value = 0; 631a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson const int32 vmax = std::numeric_limits<int32>::max(); 632a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson assert(vmax > 0); 633a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson assert(vmax >= base); 634a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson const int32 vmax_over_base = vmax / base; 635a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson const char* start = text.data(); 636a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson const char* end = start + text.size(); 637a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // loop over digits 638a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson for (; start < end; ++start) { 639a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson unsigned char c = static_cast<unsigned char>(start[0]); 640a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson int digit = c - '0'; 641a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (digit >= base || digit < 0) { 642a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *value_p = value; 643a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return false; 644a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 645a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (value > vmax_over_base) { 646a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *value_p = vmax; 647a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return false; 648a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 649a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson value *= base; 650a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (value > vmax - digit) { 651a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *value_p = vmax; 652a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return false; 653a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 654a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson value += digit; 655a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 656a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *value_p = value; 657a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return true; 658a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson} 659a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson 660a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidsoninline bool safe_parse_negative_int( 661a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson string text, int32* value_p) { 662a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson int base = 10; 663a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson int32 value = 0; 664a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson const int32 vmin = std::numeric_limits<int32>::min(); 665a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson assert(vmin < 0); 666a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson assert(vmin <= 0 - base); 667a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson int32 vmin_over_base = vmin / base; 668a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // 2003 c++ standard [expr.mul] 669a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // "... the sign of the remainder is implementation-defined." 670a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // Although (vmin/base)*base + vmin%base is always vmin. 671a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // 2011 c++ standard tightens the spec but we cannot rely on it. 672a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (vmin % base > 0) { 673a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson vmin_over_base += 1; 674a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 675a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson const char* start = text.data(); 676a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson const char* end = start + text.size(); 677a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // loop over digits 678a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson for (; start < end; ++start) { 679a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson unsigned char c = static_cast<unsigned char>(start[0]); 680a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson int digit = c - '0'; 681a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (digit >= base || digit < 0) { 682a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *value_p = value; 683a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return false; 684a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 685a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (value < vmin_over_base) { 686a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *value_p = vmin; 687a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return false; 688a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 689a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson value *= base; 690a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (value < vmin + digit) { 691a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *value_p = vmin; 692a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return false; 693a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 694a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson value -= digit; 695a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 696a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *value_p = value; 697a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return true; 698a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson} 699a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson 700a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidsonbool safe_int(string text, int32* value_p) { 701a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *value_p = 0; 702a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson bool negative; 703a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (!safe_parse_sign(&text, &negative)) { 704a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return false; 705a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 706a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (!negative) { 707a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return safe_parse_positive_int(text, value_p); 708a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } else { 709a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return safe_parse_negative_int(text, value_p); 710a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 711a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson} 712a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson 713fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 714fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// FastIntToBuffer() 715fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// FastInt64ToBuffer() 716fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// FastHexToBuffer() 717fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// FastHex64ToBuffer() 718fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// FastHex32ToBuffer() 719fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 720fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 721fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Offset into buffer where FastInt64ToBuffer places the end of string 722fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// null character. Also used by FastInt64ToBufferLeft. 723fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestatic const int kFastInt64ToBufferOffset = 21; 724fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 725fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar *FastInt64ToBuffer(int64 i, char* buffer) { 726fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // We could collapse the positive and negative sections, but that 727fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // would be slightly slower for positive numbers... 728fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // 22 bytes is enough to store -2**64, -18446744073709551616. 729fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char* p = buffer + kFastInt64ToBufferOffset; 730fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '\0'; 731fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (i >= 0) { 732fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville do { 733fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '0' + i % 10; 734fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i /= 10; 735fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } while (i > 0); 736fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return p + 1; 737fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 738fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // On different platforms, % and / have different behaviors for 739fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // negative numbers, so we need to jump through hoops to make sure 740fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // we don't divide negative numbers. 741fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (i > -10) { 742fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i = -i; 743fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '0' + i; 744fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p = '-'; 745fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return p; 746fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 747fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Make sure we aren't at MIN_INT, in which case we can't say i = -i 748fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i = i + 10; 749fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i = -i; 750fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '0' + i % 10; 751fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Undo what we did a moment ago 752fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i = i / 10 + 1; 753fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville do { 754fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '0' + i % 10; 755fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i /= 10; 756fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } while (i > 0); 757fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p = '-'; 758fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return p; 759fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 760fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 761fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 762fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 763fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Offset into buffer where FastInt32ToBuffer places the end of string 764fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// null character. Also used by FastInt32ToBufferLeft 765fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestatic const int kFastInt32ToBufferOffset = 11; 766fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 767fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Yes, this is a duplicate of FastInt64ToBuffer. But, we need this for the 768fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// compiler to generate 32 bit arithmetic instructions. It's much faster, at 769fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// least with 32 bit binaries. 770fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar *FastInt32ToBuffer(int32 i, char* buffer) { 771fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // We could collapse the positive and negative sections, but that 772fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // would be slightly slower for positive numbers... 773fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // 12 bytes is enough to store -2**32, -4294967296. 774fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char* p = buffer + kFastInt32ToBufferOffset; 775fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '\0'; 776fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (i >= 0) { 777fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville do { 778fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '0' + i % 10; 779fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i /= 10; 780fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } while (i > 0); 781fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return p + 1; 782fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 783fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // On different platforms, % and / have different behaviors for 784fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // negative numbers, so we need to jump through hoops to make sure 785fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // we don't divide negative numbers. 786fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (i > -10) { 787fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i = -i; 788fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '0' + i; 789fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p = '-'; 790fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return p; 791fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else { 792fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Make sure we aren't at MIN_INT, in which case we can't say i = -i 793fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i = i + 10; 794fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i = -i; 795fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '0' + i % 10; 796fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Undo what we did a moment ago 797fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i = i / 10 + 1; 798fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville do { 799fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '0' + i % 10; 800fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i /= 10; 801fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } while (i > 0); 802fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p = '-'; 803fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return p; 804fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 805fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 806fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 807fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 808fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar *FastHexToBuffer(int i, char* buffer) { 809fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i; 810fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 811fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville static const char *hexdigits = "0123456789abcdef"; 812fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char *p = buffer + 21; 813fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '\0'; 814fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville do { 815fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = hexdigits[i & 15]; // mod by 16 816fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville i >>= 4; // divide by 16 817fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } while (i > 0); 818fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return p + 1; 819fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 820fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 821fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) { 822fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville static const char *hexdigits = "0123456789abcdef"; 823fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[num_byte] = '\0'; 824fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville for (int i = num_byte - 1; i >= 0; i--) { 825a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson#ifdef _M_X64 826a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // MSVC x64 platform has a bug optimizing the uint32(value) in the #else 827a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // block. Given that the uint32 cast was to improve performance on 32-bit 828a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // platforms, we use 64-bit '&' directly. 829a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson buffer[i] = hexdigits[value & 0xf]; 830a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson#else 831fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[i] = hexdigits[uint32(value) & 0xf]; 832a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson#endif 833fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville value >>= 4; 834fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 835fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 836fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 837fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 838fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar *FastHex64ToBuffer(uint64 value, char* buffer) { 839fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return InternalFastHexToBuffer(value, buffer, 16); 840fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 841fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 842fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar *FastHex32ToBuffer(uint32 value, char* buffer) { 843fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return InternalFastHexToBuffer(value, buffer, 8); 844fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 845fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 846fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestatic inline char* PlaceNum(char* p, int num, char prev_sep) { 847fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '0' + num % 10; 848fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = '0' + num / 10; 849fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *p-- = prev_sep; 850fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return p; 851fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 852fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 853fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 854fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// FastInt32ToBufferLeft() 855fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// FastUInt32ToBufferLeft() 856fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// FastInt64ToBufferLeft() 857fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// FastUInt64ToBufferLeft() 858fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 859fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Like the Fast*ToBuffer() functions above, these are intended for speed. 860fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Unlike the Fast*ToBuffer() functions, however, these functions write 861fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// their output to the beginning of the buffer (hence the name, as the 862fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// output is left-aligned). The caller is responsible for ensuring that 863fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// the buffer has enough space to hold the output. 864fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 865fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Returns a pointer to the end of the string (i.e. the null character 866fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// terminating the string). 867fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 868fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 869fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestatic const char two_ASCII_digits[100][2] = { 870fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'}, 871fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'}, 872fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'}, 873fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'}, 874fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'}, 875fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'}, 876fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'}, 877fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'}, 878fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'}, 879fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'}, 880fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'}, 881fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'}, 882fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'}, 883fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'}, 884fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'}, 885fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'}, 886fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'}, 887fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'}, 888fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'}, 889fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'} 890fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville}; 891fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 892fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar* FastUInt32ToBufferLeft(uint32 u, char* buffer) { 893fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int digits; 894fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char *ASCII_digits = NULL; 895fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // The idea of this implementation is to trim the number of divides to as few 896fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // as possible by using multiplication and subtraction rather than mod (%), 897fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // and by outputting two digits at a time rather than one. 898fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // The huge-number case is first, in the hopes that the compiler will output 899fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // that case in one branch-free block of code, and only output conditional 900fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // branches into it from below. 901fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (u >= 1000000000) { // >= 1,000,000,000 902fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 100000000; // 100,000,000 903fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ASCII_digits = two_ASCII_digits[digits]; 904fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[0] = ASCII_digits[0]; 905fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[1] = ASCII_digits[1]; 906fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer += 2; 907fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillesublt100_000_000: 908fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u -= digits * 100000000; // 100,000,000 909fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillelt100_000_000: 910fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 1000000; // 1,000,000 911fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ASCII_digits = two_ASCII_digits[digits]; 912fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[0] = ASCII_digits[0]; 913fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[1] = ASCII_digits[1]; 914fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer += 2; 915fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillesublt1_000_000: 916fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u -= digits * 1000000; // 1,000,000 917fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillelt1_000_000: 918fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 10000; // 10,000 919fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ASCII_digits = two_ASCII_digits[digits]; 920fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[0] = ASCII_digits[0]; 921fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[1] = ASCII_digits[1]; 922fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer += 2; 923fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillesublt10_000: 924fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u -= digits * 10000; // 10,000 925fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillelt10_000: 926fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 100; 927fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ASCII_digits = two_ASCII_digits[digits]; 928fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[0] = ASCII_digits[0]; 929fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[1] = ASCII_digits[1]; 930fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer += 2; 931fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillesublt100: 932fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u -= digits * 100; 933fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillelt100: 934fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u; 935fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ASCII_digits = two_ASCII_digits[digits]; 936fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[0] = ASCII_digits[0]; 937fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[1] = ASCII_digits[1]; 938fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer += 2; 939fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilledone: 940fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer = 0; 941fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 942fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 943fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 944fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (u < 100) { 945fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u; 946fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (u >= 10) goto lt100; 947fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer++ = '0' + digits; 948fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville goto done; 949fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 950fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (u < 10000) { // 10,000 951fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (u >= 1000) goto lt10_000; 952fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 100; 953fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer++ = '0' + digits; 954fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville goto sublt100; 955fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 956fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (u < 1000000) { // 1,000,000 957fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (u >= 100000) goto lt1_000_000; 958fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 10000; // 10,000 959fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer++ = '0' + digits; 960fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville goto sublt10_000; 961fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 962fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (u < 100000000) { // 100,000,000 963fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (u >= 10000000) goto lt100_000_000; 964fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 1000000; // 1,000,000 965fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer++ = '0' + digits; 966fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville goto sublt1_000_000; 967fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 968fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // we already know that u < 1,000,000,000 969fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 100000000; // 100,000,000 970fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer++ = '0' + digits; 971fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville goto sublt100_000_000; 972fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 973fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 974fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar* FastInt32ToBufferLeft(int32 i, char* buffer) { 975fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville uint32 u = i; 976fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (i < 0) { 977fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer++ = '-'; 978fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u = -i; 979fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 980fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return FastUInt32ToBufferLeft(u, buffer); 981fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 982fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 983fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { 984fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int digits; 985fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char *ASCII_digits = NULL; 986fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 987fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville uint32 u = static_cast<uint32>(u64); 988fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (u == u64) return FastUInt32ToBufferLeft(u, buffer); 989fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 990fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville uint64 top_11_digits = u64 / 1000000000; 991fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer = FastUInt64ToBufferLeft(top_11_digits, buffer); 992fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u = u64 - (top_11_digits * 1000000000); 993fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 994fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 10000000; // 10,000,000 995fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_DCHECK_LT(digits, 100); 996fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ASCII_digits = two_ASCII_digits[digits]; 997fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[0] = ASCII_digits[0]; 998fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[1] = ASCII_digits[1]; 999fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer += 2; 1000fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u -= digits * 10000000; // 10,000,000 1001fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 100000; // 100,000 1002fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ASCII_digits = two_ASCII_digits[digits]; 1003fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[0] = ASCII_digits[0]; 1004fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[1] = ASCII_digits[1]; 1005fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer += 2; 1006fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u -= digits * 100000; // 100,000 1007fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 1000; // 1,000 1008fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ASCII_digits = two_ASCII_digits[digits]; 1009fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[0] = ASCII_digits[0]; 1010fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[1] = ASCII_digits[1]; 1011fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer += 2; 1012fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u -= digits * 1000; // 1,000 1013fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u / 10; 1014fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ASCII_digits = two_ASCII_digits[digits]; 1015fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[0] = ASCII_digits[0]; 1016fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer[1] = ASCII_digits[1]; 1017fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville buffer += 2; 1018fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u -= digits * 10; 1019fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville digits = u; 1020fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer++ = '0' + digits; 1021fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer = 0; 1022fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 1023fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1024fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1025fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar* FastInt64ToBufferLeft(int64 i, char* buffer) { 1026fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville uint64 u = i; 1027fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (i < 0) { 1028fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer++ = '-'; 1029fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville u = -i; 1030fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 1031fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return FastUInt64ToBufferLeft(u, buffer); 1032fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1033fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1034fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 1035fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// SimpleItoa() 1036fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Description: converts an integer to a string. 1037fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 1038fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Return value: string 1039fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 1040fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1041fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring SimpleItoa(int i) { 1042fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char buffer[kFastToBufferSize]; 1043fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return (sizeof(i) == 4) ? 1044fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastInt32ToBuffer(i, buffer) : 1045fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastInt64ToBuffer(i, buffer); 1046fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1047fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1048fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring SimpleItoa(unsigned int i) { 1049fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char buffer[kFastToBufferSize]; 1050fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return string(buffer, (sizeof(i) == 4) ? 1051fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastUInt32ToBufferLeft(i, buffer) : 1052fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastUInt64ToBufferLeft(i, buffer)); 1053fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1054fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1055fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring SimpleItoa(long i) { 1056fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char buffer[kFastToBufferSize]; 1057fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return (sizeof(i) == 4) ? 1058fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastInt32ToBuffer(i, buffer) : 1059fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastInt64ToBuffer(i, buffer); 1060fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1061fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1062fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring SimpleItoa(unsigned long i) { 1063fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char buffer[kFastToBufferSize]; 1064fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return string(buffer, (sizeof(i) == 4) ? 1065fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastUInt32ToBufferLeft(i, buffer) : 1066fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastUInt64ToBufferLeft(i, buffer)); 1067fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1068fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1069fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring SimpleItoa(long long i) { 1070fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char buffer[kFastToBufferSize]; 1071fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return (sizeof(i) == 4) ? 1072fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastInt32ToBuffer(i, buffer) : 1073fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastInt64ToBuffer(i, buffer); 1074fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1075fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1076fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring SimpleItoa(unsigned long long i) { 1077fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char buffer[kFastToBufferSize]; 1078fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return string(buffer, (sizeof(i) == 4) ? 1079fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastUInt32ToBufferLeft(i, buffer) : 1080fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville FastUInt64ToBufferLeft(i, buffer)); 1081fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1082fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1083fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 1084fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// SimpleDtoa() 1085fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// SimpleFtoa() 1086fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// DoubleToBuffer() 1087fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// FloatToBuffer() 1088fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// We want to print the value without losing precision, but we also do 1089fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// not want to print more digits than necessary. This turns out to be 1090fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// trickier than it sounds. Numbers like 0.2 cannot be represented 1091fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// exactly in binary. If we print 0.2 with a very large precision, 1092fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167". 1093fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// On the other hand, if we set the precision too low, we lose 1094fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// significant digits when printing numbers that actually need them. 1095fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// It turns out there is no precision value that does the right thing 1096fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// for all numbers. 1097fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 1098fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Our strategy is to first try printing with a precision that is never 1099fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// over-precise, then parse the result with strtod() to see if it 1100fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// matches. If not, we print again with a precision that will always 1101fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// give a precise result, but may use more digits than necessary. 1102fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 1103fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// An arguably better strategy would be to use the algorithm described 1104fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// in "How to Print Floating-Point Numbers Accurately" by Steele & 1105fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// White, e.g. as implemented by David M. Gay's dtoa(). It turns out, 1106fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// however, that the following implementation is about as fast as 1107fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// DMG's code. Furthermore, DMG's code locks mutexes, which means it 1108fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// will not scale well on multi-core machines. DMG's code is slightly 1109fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// more accurate (in that it will never use more digits than 1110fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// necessary), but this is probably irrelevant for most users. 1111fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 1112fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Rob Pike and Ken Thompson also have an implementation of dtoa() in 1113fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// third_party/fmt/fltfmt.cc. Their implementation is similar to this 1114fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// one in that it makes guesses and then uses strtod() to check them. 1115fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Their implementation is faster because they use their own code to 1116fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// generate the digits in the first place rather than use snprintf(), 1117fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// thus avoiding format string parsing overhead. However, this makes 1118fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// it considerably more complicated than the following implementation, 1119fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// and it is embedded in a larger library. If speed turns out to be 1120fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// an issue, we could re-implement this in terms of their 1121fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// implementation. 1122fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// ---------------------------------------------------------------------- 1123fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1124fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring SimpleDtoa(double value) { 1125fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char buffer[kDoubleToBufferSize]; 1126fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return DoubleToBuffer(value, buffer); 1127fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1128fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1129fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestring SimpleFtoa(float value) { 1130fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char buffer[kFloatToBufferSize]; 1131fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return FloatToBuffer(value, buffer); 1132fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1133fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1134fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillestatic inline bool IsValidFloatChar(char c) { 1135fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return ('0' <= c && c <= '9') || 1136fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville c == 'e' || c == 'E' || 1137fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville c == '+' || c == '-'; 1138fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1139fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1140fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillevoid DelocalizeRadix(char* buffer) { 1141fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Fast check: if the buffer has a normal decimal point, assume no 1142fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // translation is needed. 1143fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (strchr(buffer, '.') != NULL) return; 1144fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1145fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Find the first unknown character. 1146fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville while (IsValidFloatChar(*buffer)) ++buffer; 1147fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1148fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (*buffer == '\0') { 1149fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // No radix character found. 1150fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return; 1151fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 1152fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1153fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // We are now pointing at the locale-specific radix character. Replace it 1154fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // with '.'. 1155fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *buffer = '.'; 1156fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ++buffer; 1157fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1158fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (!IsValidFloatChar(*buffer) && *buffer != '\0') { 1159fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // It appears the radix was a multi-byte character. We need to remove the 1160fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // extra bytes. 1161fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char* target = buffer; 1162fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0'); 1163fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville memmove(target, buffer, strlen(buffer) + 1); 1164fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 1165fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1166fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1167fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar* DoubleToBuffer(double value, char* buffer) { 1168fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all 1169fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // platforms these days. Just in case some system exists where DBL_DIG 1170fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // is significantly larger -- and risks overflowing our buffer -- we have 1171fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // this assert. 1172fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); 1173fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1174fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (value == numeric_limits<double>::infinity()) { 1175fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville strcpy(buffer, "inf"); 1176fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 1177fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else if (value == -numeric_limits<double>::infinity()) { 1178fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville strcpy(buffer, "-inf"); 1179fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 1180fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else if (IsNaN(value)) { 1181fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville strcpy(buffer, "nan"); 1182fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 1183fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 1184fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1185fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int snprintf_result = 1186fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value); 1187fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1188fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // The snprintf should never overflow because the buffer is significantly 1189fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // larger than the precision we asked for. 1190fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); 1191fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1192fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // We need to make parsed_value volatile in order to force the compiler to 1193fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // write it out to the stack. Otherwise, it may keep the value in a 1194fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // register, and if it does that, it may keep it as a long double instead 1195fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // of a double. This long double may have extra bits that make it compare 1196fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // unequal to "value" even though it would be exactly equal if it were 1197fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // truncated to a double. 1198fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville volatile double parsed_value = strtod(buffer, NULL); 1199fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (parsed_value != value) { 1200fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int snprintf_result = 1201fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value); 1202fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1203fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Should never overflow; see above. 1204fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); 1205fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 1206fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1207fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville DelocalizeRadix(buffer); 1208fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 1209fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1210fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1211fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillebool safe_strtof(const char* str, float* value) { 1212fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char* endptr; 1213fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville errno = 0; // errno only gets set on errors 1214d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville#if defined(_WIN32) || defined (__hpux) // has no strtof() 1215fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *value = strtod(str, &endptr); 1216fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#else 1217fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville *value = strtof(str, &endptr); 1218fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#endif 1219fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return *str != 0 && *endptr == 0 && errno == 0; 1220fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1221fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1222fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillechar* FloatToBuffer(float value, char* buffer) { 1223fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all 1224fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // platforms these days. Just in case some system exists where FLT_DIG 1225fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // is significantly larger -- and risks overflowing our buffer -- we have 1226fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // this assert. 1227fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); 1228fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1229fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (value == numeric_limits<double>::infinity()) { 1230fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville strcpy(buffer, "inf"); 1231fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 1232fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else if (value == -numeric_limits<double>::infinity()) { 1233fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville strcpy(buffer, "-inf"); 1234fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 1235fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } else if (IsNaN(value)) { 1236fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville strcpy(buffer, "nan"); 1237fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 1238fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 1239fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1240fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int snprintf_result = 1241fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value); 1242fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1243fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // The snprintf should never overflow because the buffer is significantly 1244fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // larger than the precision we asked for. 1245fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); 1246fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1247fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville float parsed_value; 1248fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { 1249fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int snprintf_result = 1250fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value); 1251fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1252fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Should never overflow; see above. 1253fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); 1254fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 1255fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1256fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville DelocalizeRadix(buffer); 1257fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return buffer; 1258fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1259fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1260a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidsonstring ToHex(uint64 num) { 1261a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson if (num == 0) { 1262a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return string("0"); 1263a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson } 1264fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1265a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // Compute hex bytes in reverse order, writing to the back of the 1266a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson // buffer. 1267a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson char buf[16]; // No more than 16 hex digits needed. 1268a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson char* bufptr = buf + 16; 1269a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson static const char kHexChars[] = "0123456789abcdef"; 1270a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson while (num != 0) { 1271a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson *--bufptr = kHexChars[num & 0xf]; 1272a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson num >>= 4; 1273fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 1274fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1275a3b2a6da25a76f17c73d31def3952feb0fd2296eJeff Davidson return string(bufptr, buf + 16 - bufptr); 1276fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 1277fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 1278fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} // namespace protobuf 1279fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} // namespace google 1280