1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_number_conversions.h"
6
7#include <ctype.h>
8#include <errno.h>
9#include <stdlib.h>
10#include <wctype.h>
11
12#include <limits>
13
14#include "base/logging.h"
15#include "base/scoped_clear_errno.h"
16#include "base/strings/utf_string_conversions.h"
17#include "base/third_party/dmg_fp/dmg_fp.h"
18
19namespace base {
20
21namespace {
22
23template <typename STR, typename INT, typename UINT, bool NEG>
24struct IntToStringT {
25  // This is to avoid a compiler warning about unary minus on unsigned type.
26  // For example, say you had the following code:
27  //   template <typename INT>
28  //   INT abs(INT value) { return value < 0 ? -value : value; }
29  // Even though if INT is unsigned, it's impossible for value < 0, so the
30  // unary minus will never be taken, the compiler will still generate a
31  // warning.  We do a little specialization dance...
32  template <typename INT2, typename UINT2, bool NEG2>
33  struct ToUnsignedT {};
34
35  template <typename INT2, typename UINT2>
36  struct ToUnsignedT<INT2, UINT2, false> {
37    static UINT2 ToUnsigned(INT2 value) {
38      return static_cast<UINT2>(value);
39    }
40  };
41
42  template <typename INT2, typename UINT2>
43  struct ToUnsignedT<INT2, UINT2, true> {
44    static UINT2 ToUnsigned(INT2 value) {
45      return static_cast<UINT2>(value < 0 ? -value : value);
46    }
47  };
48
49  // This set of templates is very similar to the above templates, but
50  // for testing whether an integer is negative.
51  template <typename INT2, bool NEG2>
52  struct TestNegT {};
53  template <typename INT2>
54  struct TestNegT<INT2, false> {
55    static bool TestNeg(INT2 value) {
56      // value is unsigned, and can never be negative.
57      return false;
58    }
59  };
60  template <typename INT2>
61  struct TestNegT<INT2, true> {
62    static bool TestNeg(INT2 value) {
63      return value < 0;
64    }
65  };
66
67  static STR IntToString(INT value) {
68    // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
69    // So round up to allocate 3 output characters per byte, plus 1 for '-'.
70    const int kOutputBufSize = 3 * sizeof(INT) + 1;
71
72    // Allocate the whole string right away, we will right back to front, and
73    // then return the substr of what we ended up using.
74    STR outbuf(kOutputBufSize, 0);
75
76    bool is_neg = TestNegT<INT, NEG>::TestNeg(value);
77    // Even though is_neg will never be true when INT is parameterized as
78    // unsigned, even the presence of the unary operation causes a warning.
79    UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value);
80
81    for (typename STR::iterator it = outbuf.end();;) {
82      --it;
83      DCHECK(it != outbuf.begin());
84      *it = static_cast<typename STR::value_type>((res % 10) + '0');
85      res /= 10;
86
87      // We're done..
88      if (res == 0) {
89        if (is_neg) {
90          --it;
91          DCHECK(it != outbuf.begin());
92          *it = static_cast<typename STR::value_type>('-');
93        }
94        return STR(it, outbuf.end());
95      }
96    }
97    NOTREACHED();
98    return STR();
99  }
100};
101
102// Utility to convert a character to a digit in a given base
103template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit {
104};
105
106// Faster specialization for bases <= 10
107template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> {
108 public:
109  static bool Convert(CHAR c, uint8* digit) {
110    if (c >= '0' && c < '0' + BASE) {
111      *digit = c - '0';
112      return true;
113    }
114    return false;
115  }
116};
117
118// Specialization for bases where 10 < base <= 36
119template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> {
120 public:
121  static bool Convert(CHAR c, uint8* digit) {
122    if (c >= '0' && c <= '9') {
123      *digit = c - '0';
124    } else if (c >= 'a' && c < 'a' + BASE - 10) {
125      *digit = c - 'a' + 10;
126    } else if (c >= 'A' && c < 'A' + BASE - 10) {
127      *digit = c - 'A' + 10;
128    } else {
129      return false;
130    }
131    return true;
132  }
133};
134
135template<int BASE, typename CHAR> bool CharToDigit(CHAR c, uint8* digit) {
136  return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit);
137}
138
139// There is an IsWhitespace for wchars defined in string_util.h, but it is
140// locale independent, whereas the functions we are replacing were
141// locale-dependent. TBD what is desired, but for the moment let's not introduce
142// a change in behaviour.
143template<typename CHAR> class WhitespaceHelper {
144};
145
146template<> class WhitespaceHelper<char> {
147 public:
148  static bool Invoke(char c) {
149    return 0 != isspace(static_cast<unsigned char>(c));
150  }
151};
152
153template<> class WhitespaceHelper<char16> {
154 public:
155  static bool Invoke(char16 c) {
156    return 0 != iswspace(c);
157  }
158};
159
160template<typename CHAR> bool LocalIsWhitespace(CHAR c) {
161  return WhitespaceHelper<CHAR>::Invoke(c);
162}
163
164// IteratorRangeToNumberTraits should provide:
165//  - a typedef for iterator_type, the iterator type used as input.
166//  - a typedef for value_type, the target numeric type.
167//  - static functions min, max (returning the minimum and maximum permitted
168//    values)
169//  - constant kBase, the base in which to interpret the input
170template<typename IteratorRangeToNumberTraits>
171class IteratorRangeToNumber {
172 public:
173  typedef IteratorRangeToNumberTraits traits;
174  typedef typename traits::iterator_type const_iterator;
175  typedef typename traits::value_type value_type;
176
177  // Generalized iterator-range-to-number conversion.
178  //
179  static bool Invoke(const_iterator begin,
180                     const_iterator end,
181                     value_type* output) {
182    bool valid = true;
183
184    while (begin != end && LocalIsWhitespace(*begin)) {
185      valid = false;
186      ++begin;
187    }
188
189    if (begin != end && *begin == '-') {
190      if (!std::numeric_limits<value_type>::is_signed) {
191        valid = false;
192      } else if (!Negative::Invoke(begin + 1, end, output)) {
193        valid = false;
194      }
195    } else {
196      if (begin != end && *begin == '+') {
197        ++begin;
198      }
199      if (!Positive::Invoke(begin, end, output)) {
200        valid = false;
201      }
202    }
203
204    return valid;
205  }
206
207 private:
208  // Sign provides:
209  //  - a static function, CheckBounds, that determines whether the next digit
210  //    causes an overflow/underflow
211  //  - a static function, Increment, that appends the next digit appropriately
212  //    according to the sign of the number being parsed.
213  template<typename Sign>
214  class Base {
215   public:
216    static bool Invoke(const_iterator begin, const_iterator end,
217                       typename traits::value_type* output) {
218      *output = 0;
219
220      if (begin == end) {
221        return false;
222      }
223
224      // Note: no performance difference was found when using template
225      // specialization to remove this check in bases other than 16
226      if (traits::kBase == 16 && end - begin > 2 && *begin == '0' &&
227          (*(begin + 1) == 'x' || *(begin + 1) == 'X')) {
228        begin += 2;
229      }
230
231      for (const_iterator current = begin; current != end; ++current) {
232        uint8 new_digit = 0;
233
234        if (!CharToDigit<traits::kBase>(*current, &new_digit)) {
235          return false;
236        }
237
238        if (current != begin) {
239          if (!Sign::CheckBounds(output, new_digit)) {
240            return false;
241          }
242          *output *= traits::kBase;
243        }
244
245        Sign::Increment(new_digit, output);
246      }
247      return true;
248    }
249  };
250
251  class Positive : public Base<Positive> {
252   public:
253    static bool CheckBounds(value_type* output, uint8 new_digit) {
254      if (*output > static_cast<value_type>(traits::max() / traits::kBase) ||
255          (*output == static_cast<value_type>(traits::max() / traits::kBase) &&
256           new_digit > traits::max() % traits::kBase)) {
257        *output = traits::max();
258        return false;
259      }
260      return true;
261    }
262    static void Increment(uint8 increment, value_type* output) {
263      *output += increment;
264    }
265  };
266
267  class Negative : public Base<Negative> {
268   public:
269    static bool CheckBounds(value_type* output, uint8 new_digit) {
270      if (*output < traits::min() / traits::kBase ||
271          (*output == traits::min() / traits::kBase &&
272           new_digit > 0 - traits::min() % traits::kBase)) {
273        *output = traits::min();
274        return false;
275      }
276      return true;
277    }
278    static void Increment(uint8 increment, value_type* output) {
279      *output -= increment;
280    }
281  };
282};
283
284template<typename ITERATOR, typename VALUE, int BASE>
285class BaseIteratorRangeToNumberTraits {
286 public:
287  typedef ITERATOR iterator_type;
288  typedef VALUE value_type;
289  static value_type min() {
290    return std::numeric_limits<value_type>::min();
291  }
292  static value_type max() {
293    return std::numeric_limits<value_type>::max();
294  }
295  static const int kBase = BASE;
296};
297
298template<typename ITERATOR>
299class BaseHexIteratorRangeToIntTraits
300    : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> {
301};
302
303template<typename ITERATOR>
304class BaseHexIteratorRangeToInt64Traits
305    : public BaseIteratorRangeToNumberTraits<ITERATOR, int64, 16> {
306};
307
308template<typename ITERATOR>
309class BaseHexIteratorRangeToUInt64Traits
310    : public BaseIteratorRangeToNumberTraits<ITERATOR, uint64, 16> {
311};
312
313typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator>
314    HexIteratorRangeToIntTraits;
315
316typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator>
317    HexIteratorRangeToInt64Traits;
318
319typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator>
320    HexIteratorRangeToUInt64Traits;
321
322template<typename STR>
323bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) {
324  DCHECK_EQ(output->size(), 0u);
325  size_t count = input.size();
326  if (count == 0 || (count % 2) != 0)
327    return false;
328  for (uintptr_t i = 0; i < count / 2; ++i) {
329    uint8 msb = 0;  // most significant 4 bits
330    uint8 lsb = 0;  // least significant 4 bits
331    if (!CharToDigit<16>(input[i * 2], &msb) ||
332        !CharToDigit<16>(input[i * 2 + 1], &lsb))
333      return false;
334    output->push_back((msb << 4) | lsb);
335  }
336  return true;
337}
338
339template <typename VALUE, int BASE>
340class StringPieceToNumberTraits
341    : public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator,
342                                             VALUE,
343                                             BASE> {
344};
345
346template <typename VALUE>
347bool StringToIntImpl(const StringPiece& input, VALUE* output) {
348  return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke(
349      input.begin(), input.end(), output);
350}
351
352template <typename VALUE, int BASE>
353class StringPiece16ToNumberTraits
354    : public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator,
355                                             VALUE,
356                                             BASE> {
357};
358
359template <typename VALUE>
360bool String16ToIntImpl(const StringPiece16& input, VALUE* output) {
361  return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke(
362      input.begin(), input.end(), output);
363}
364
365}  // namespace
366
367std::string IntToString(int value) {
368  return IntToStringT<std::string, int, unsigned int, true>::
369      IntToString(value);
370}
371
372string16 IntToString16(int value) {
373  return IntToStringT<string16, int, unsigned int, true>::
374      IntToString(value);
375}
376
377std::string UintToString(unsigned int value) {
378  return IntToStringT<std::string, unsigned int, unsigned int, false>::
379      IntToString(value);
380}
381
382string16 UintToString16(unsigned int value) {
383  return IntToStringT<string16, unsigned int, unsigned int, false>::
384      IntToString(value);
385}
386
387std::string Int64ToString(int64 value) {
388  return IntToStringT<std::string, int64, uint64, true>::
389      IntToString(value);
390}
391
392string16 Int64ToString16(int64 value) {
393  return IntToStringT<string16, int64, uint64, true>::IntToString(value);
394}
395
396std::string Uint64ToString(uint64 value) {
397  return IntToStringT<std::string, uint64, uint64, false>::
398      IntToString(value);
399}
400
401string16 Uint64ToString16(uint64 value) {
402  return IntToStringT<string16, uint64, uint64, false>::
403      IntToString(value);
404}
405
406std::string DoubleToString(double value) {
407  // According to g_fmt.cc, it is sufficient to declare a buffer of size 32.
408  char buffer[32];
409  dmg_fp::g_fmt(buffer, value);
410  return std::string(buffer);
411}
412
413bool StringToInt(const StringPiece& input, int* output) {
414  return StringToIntImpl(input, output);
415}
416
417bool StringToInt(const StringPiece16& input, int* output) {
418  return String16ToIntImpl(input, output);
419}
420
421bool StringToUint(const StringPiece& input, unsigned* output) {
422  return StringToIntImpl(input, output);
423}
424
425bool StringToUint(const StringPiece16& input, unsigned* output) {
426  return String16ToIntImpl(input, output);
427}
428
429bool StringToInt64(const StringPiece& input, int64* output) {
430  return StringToIntImpl(input, output);
431}
432
433bool StringToInt64(const StringPiece16& input, int64* output) {
434  return String16ToIntImpl(input, output);
435}
436
437bool StringToUint64(const StringPiece& input, uint64* output) {
438  return StringToIntImpl(input, output);
439}
440
441bool StringToUint64(const StringPiece16& input, uint64* output) {
442  return String16ToIntImpl(input, output);
443}
444
445bool StringToSizeT(const StringPiece& input, size_t* output) {
446  return StringToIntImpl(input, output);
447}
448
449bool StringToSizeT(const StringPiece16& input, size_t* output) {
450  return String16ToIntImpl(input, output);
451}
452
453bool StringToDouble(const std::string& input, double* output) {
454  // Thread-safe?  It is on at least Mac, Linux, and Windows.
455  ScopedClearErrno clear_errno;
456
457  char* endptr = NULL;
458  *output = dmg_fp::strtod(input.c_str(), &endptr);
459
460  // Cases to return false:
461  //  - If errno is ERANGE, there was an overflow or underflow.
462  //  - If the input string is empty, there was nothing to parse.
463  //  - If endptr does not point to the end of the string, there are either
464  //    characters remaining in the string after a parsed number, or the string
465  //    does not begin with a parseable number.  endptr is compared to the
466  //    expected end given the string's stated length to correctly catch cases
467  //    where the string contains embedded NUL characters.
468  //  - If the first character is a space, there was leading whitespace
469  return errno == 0 &&
470         !input.empty() &&
471         input.c_str() + input.length() == endptr &&
472         !isspace(input[0]);
473}
474
475// Note: if you need to add String16ToDouble, first ask yourself if it's
476// really necessary. If it is, probably the best implementation here is to
477// convert to 8-bit and then use the 8-bit version.
478
479// Note: if you need to add an iterator range version of StringToDouble, first
480// ask yourself if it's really necessary. If it is, probably the best
481// implementation here is to instantiate a string and use the string version.
482
483std::string HexEncode(const void* bytes, size_t size) {
484  static const char kHexChars[] = "0123456789ABCDEF";
485
486  // Each input byte creates two output hex characters.
487  std::string ret(size * 2, '\0');
488
489  for (size_t i = 0; i < size; ++i) {
490    char b = reinterpret_cast<const char*>(bytes)[i];
491    ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
492    ret[(i * 2) + 1] = kHexChars[b & 0xf];
493  }
494  return ret;
495}
496
497bool HexStringToInt(const StringPiece& input, int* output) {
498  return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke(
499    input.begin(), input.end(), output);
500}
501
502bool HexStringToInt64(const StringPiece& input, int64* output) {
503  return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke(
504    input.begin(), input.end(), output);
505}
506
507bool HexStringToUInt64(const StringPiece& input, uint64* output) {
508  return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke(
509      input.begin(), input.end(), output);
510}
511
512bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) {
513  return HexStringToBytesT(input, output);
514}
515
516}  // namespace base
517