1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/string_number_conversions.h"
6
7#include <errno.h>
8#include <stdlib.h>
9
10#include <limits>
11
12#include "base/logging.h"
13#include "base/third_party/dmg_fp/dmg_fp.h"
14#include "base/utf_string_conversions.h"
15
16namespace base {
17
18namespace {
19
20template <typename STR, typename INT, typename UINT, bool NEG>
21struct IntToStringT {
22  // This is to avoid a compiler warning about unary minus on unsigned type.
23  // For example, say you had the following code:
24  //   template <typename INT>
25  //   INT abs(INT value) { return value < 0 ? -value : value; }
26  // Even though if INT is unsigned, it's impossible for value < 0, so the
27  // unary minus will never be taken, the compiler will still generate a
28  // warning.  We do a little specialization dance...
29  template <typename INT2, typename UINT2, bool NEG2>
30  struct ToUnsignedT {};
31
32  template <typename INT2, typename UINT2>
33  struct ToUnsignedT<INT2, UINT2, false> {
34    static UINT2 ToUnsigned(INT2 value) {
35      return static_cast<UINT2>(value);
36    }
37  };
38
39  template <typename INT2, typename UINT2>
40  struct ToUnsignedT<INT2, UINT2, true> {
41    static UINT2 ToUnsigned(INT2 value) {
42      return static_cast<UINT2>(value < 0 ? -value : value);
43    }
44  };
45
46  // This set of templates is very similar to the above templates, but
47  // for testing whether an integer is negative.
48  template <typename INT2, bool NEG2>
49  struct TestNegT {};
50  template <typename INT2>
51  struct TestNegT<INT2, false> {
52    static bool TestNeg(INT2 value) {
53      // value is unsigned, and can never be negative.
54      return false;
55    }
56  };
57  template <typename INT2>
58  struct TestNegT<INT2, true> {
59    static bool TestNeg(INT2 value) {
60      return value < 0;
61    }
62  };
63
64  static STR IntToString(INT value) {
65    // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
66    // So round up to allocate 3 output characters per byte, plus 1 for '-'.
67    const int kOutputBufSize = 3 * sizeof(INT) + 1;
68
69    // Allocate the whole string right away, we will right back to front, and
70    // then return the substr of what we ended up using.
71    STR outbuf(kOutputBufSize, 0);
72
73    bool is_neg = TestNegT<INT, NEG>::TestNeg(value);
74    // Even though is_neg will never be true when INT is parameterized as
75    // unsigned, even the presence of the unary operation causes a warning.
76    UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value);
77
78    for (typename STR::iterator it = outbuf.end();;) {
79      --it;
80      DCHECK(it != outbuf.begin());
81      *it = static_cast<typename STR::value_type>((res % 10) + '0');
82      res /= 10;
83
84      // We're done..
85      if (res == 0) {
86        if (is_neg) {
87          --it;
88          DCHECK(it != outbuf.begin());
89          *it = static_cast<typename STR::value_type>('-');
90        }
91        return STR(it, outbuf.end());
92      }
93    }
94    NOTREACHED();
95    return STR();
96  }
97};
98
99// Utility to convert a character to a digit in a given base
100template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit {
101};
102
103// Faster specialization for bases <= 10
104template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> {
105 public:
106  static bool Convert(CHAR c, uint8* digit) {
107    if (c >= '0' && c < '0' + BASE) {
108      *digit = c - '0';
109      return true;
110    }
111    return false;
112  }
113};
114
115// Specialization for bases where 10 < base <= 36
116template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> {
117 public:
118  static bool Convert(CHAR c, uint8* digit) {
119    if (c >= '0' && c <= '9') {
120      *digit = c - '0';
121    } else if (c >= 'a' && c < 'a' + BASE - 10) {
122      *digit = c - 'a' + 10;
123    } else if (c >= 'A' && c < 'A' + BASE - 10) {
124      *digit = c - 'A' + 10;
125    } else {
126      return false;
127    }
128    return true;
129  }
130};
131
132template<int BASE, typename CHAR> bool CharToDigit(CHAR c, uint8* digit) {
133  return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit);
134}
135
136// There is an IsWhitespace for wchars defined in string_util.h, but it is
137// locale independent, whereas the functions we are replacing were
138// locale-dependent. TBD what is desired, but for the moment let's not introduce
139// a change in behaviour.
140template<typename CHAR> class WhitespaceHelper {
141};
142
143template<> class WhitespaceHelper<char> {
144 public:
145  static bool Invoke(char c) {
146    return 0 != isspace(static_cast<unsigned char>(c));
147  }
148};
149
150template<> class WhitespaceHelper<char16> {
151 public:
152  static bool Invoke(char16 c) {
153    return 0 != iswspace(c);
154  }
155};
156
157template<typename CHAR> bool LocalIsWhitespace(CHAR c) {
158  return WhitespaceHelper<CHAR>::Invoke(c);
159}
160
161// IteratorRangeToNumberTraits should provide:
162//  - a typedef for iterator_type, the iterator type used as input.
163//  - a typedef for value_type, the target numeric type.
164//  - static functions min, max (returning the minimum and maximum permitted
165//    values)
166//  - constant kBase, the base in which to interpret the input
167template<typename IteratorRangeToNumberTraits>
168class IteratorRangeToNumber {
169 public:
170  typedef IteratorRangeToNumberTraits traits;
171  typedef typename traits::iterator_type const_iterator;
172  typedef typename traits::value_type value_type;
173
174  // Generalized iterator-range-to-number conversion.
175  //
176  static bool Invoke(const_iterator begin,
177                     const_iterator end,
178                     value_type* output) {
179    bool valid = true;
180
181    while (begin != end && LocalIsWhitespace(*begin)) {
182      valid = false;
183      ++begin;
184    }
185
186    if (begin != end && *begin == '-') {
187      if (!Negative::Invoke(begin + 1, end, output)) {
188        valid = false;
189      }
190    } else {
191      if (begin != end && *begin == '+') {
192        ++begin;
193      }
194      if (!Positive::Invoke(begin, end, output)) {
195        valid = false;
196      }
197    }
198
199    return valid;
200  }
201
202 private:
203  // Sign provides:
204  //  - a static function, CheckBounds, that determines whether the next digit
205  //    causes an overflow/underflow
206  //  - a static function, Increment, that appends the next digit appropriately
207  //    according to the sign of the number being parsed.
208  template<typename Sign>
209  class Base {
210   public:
211    static bool Invoke(const_iterator begin, const_iterator end,
212                       typename traits::value_type* output) {
213      *output = 0;
214
215      if (begin == end) {
216        return false;
217      }
218
219      // Note: no performance difference was found when using template
220      // specialization to remove this check in bases other than 16
221      if (traits::kBase == 16 && end - begin >= 2 && *begin == '0' &&
222          (*(begin + 1) == 'x' || *(begin + 1) == 'X')) {
223        begin += 2;
224      }
225
226      for (const_iterator current = begin; current != end; ++current) {
227        uint8 new_digit = 0;
228
229        if (!CharToDigit<traits::kBase>(*current, &new_digit)) {
230          return false;
231        }
232
233        if (current != begin) {
234          if (!Sign::CheckBounds(output, new_digit)) {
235            return false;
236          }
237          *output *= traits::kBase;
238        }
239
240        Sign::Increment(new_digit, output);
241      }
242      return true;
243    }
244  };
245
246  class Positive : public Base<Positive> {
247   public:
248    static bool CheckBounds(value_type* output, uint8 new_digit) {
249      if (*output > static_cast<value_type>(traits::max() / traits::kBase) ||
250          (*output == static_cast<value_type>(traits::max() / traits::kBase) &&
251           new_digit > traits::max() % traits::kBase)) {
252        *output = traits::max();
253        return false;
254      }
255      return true;
256    }
257    static void Increment(uint8 increment, value_type* output) {
258      *output += increment;
259    }
260  };
261
262  class Negative : public Base<Negative> {
263   public:
264    static bool CheckBounds(value_type* output, uint8 new_digit) {
265      if (*output < traits::min() / traits::kBase ||
266          (*output == traits::min() / traits::kBase &&
267           new_digit > 0 - traits::min() % traits::kBase)) {
268        *output = traits::min();
269        return false;
270      }
271      return true;
272    }
273    static void Increment(uint8 increment, value_type* output) {
274      *output -= increment;
275    }
276  };
277};
278
279template<typename ITERATOR, typename VALUE, int BASE>
280class BaseIteratorRangeToNumberTraits {
281 public:
282  typedef ITERATOR iterator_type;
283  typedef VALUE value_type;
284  static value_type min() {
285    return std::numeric_limits<value_type>::min();
286  }
287  static value_type max() {
288    return std::numeric_limits<value_type>::max();
289  }
290  static const int kBase = BASE;
291};
292
293typedef BaseIteratorRangeToNumberTraits<std::string::const_iterator, int, 10>
294    IteratorRangeToIntTraits;
295typedef BaseIteratorRangeToNumberTraits<string16::const_iterator, int, 10>
296    WideIteratorRangeToIntTraits;
297typedef BaseIteratorRangeToNumberTraits<std::string::const_iterator, int64, 10>
298    IteratorRangeToInt64Traits;
299typedef BaseIteratorRangeToNumberTraits<string16::const_iterator, int64, 10>
300    WideIteratorRangeToInt64Traits;
301
302typedef BaseIteratorRangeToNumberTraits<const char*, int, 10>
303    CharBufferToIntTraits;
304typedef BaseIteratorRangeToNumberTraits<const char16*, int, 10>
305    WideCharBufferToIntTraits;
306typedef BaseIteratorRangeToNumberTraits<const char*, int64, 10>
307    CharBufferToInt64Traits;
308typedef BaseIteratorRangeToNumberTraits<const char16*, int64, 10>
309    WideCharBufferToInt64Traits;
310
311template<typename ITERATOR>
312class BaseHexIteratorRangeToIntTraits
313    : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> {
314 public:
315  // Allow parsing of 0xFFFFFFFF, which is technically an overflow
316  static unsigned int max() {
317    return std::numeric_limits<unsigned int>::max();
318  }
319};
320
321typedef BaseHexIteratorRangeToIntTraits<std::string::const_iterator>
322    HexIteratorRangeToIntTraits;
323typedef BaseHexIteratorRangeToIntTraits<const char*>
324    HexCharBufferToIntTraits;
325
326template<typename STR>
327bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) {
328  DCHECK_EQ(output->size(), 0u);
329  size_t count = input.size();
330  if (count == 0 || (count % 2) != 0)
331    return false;
332  for (uintptr_t i = 0; i < count / 2; ++i) {
333    uint8 msb = 0;  // most significant 4 bits
334    uint8 lsb = 0;  // least significant 4 bits
335    if (!CharToDigit<16>(input[i * 2], &msb) ||
336        !CharToDigit<16>(input[i * 2 + 1], &lsb))
337      return false;
338    output->push_back((msb << 4) | lsb);
339  }
340  return true;
341}
342
343}  // namespace
344
345std::string IntToString(int value) {
346  return IntToStringT<std::string, int, unsigned int, true>::
347      IntToString(value);
348}
349
350string16 IntToString16(int value) {
351  return IntToStringT<string16, int, unsigned int, true>::
352      IntToString(value);
353}
354
355std::string UintToString(unsigned int value) {
356  return IntToStringT<std::string, unsigned int, unsigned int, false>::
357      IntToString(value);
358}
359
360string16 UintToString16(unsigned int value) {
361  return IntToStringT<string16, unsigned int, unsigned int, false>::
362      IntToString(value);
363}
364
365std::string Int64ToString(int64 value) {
366  return IntToStringT<std::string, int64, uint64, true>::
367      IntToString(value);
368}
369
370string16 Int64ToString16(int64 value) {
371  return IntToStringT<string16, int64, uint64, true>::IntToString(value);
372}
373
374std::string Uint64ToString(uint64 value) {
375  return IntToStringT<std::string, uint64, uint64, false>::
376      IntToString(value);
377}
378
379string16 Uint64ToString16(uint64 value) {
380  return IntToStringT<string16, uint64, uint64, false>::
381      IntToString(value);
382}
383
384std::string DoubleToString(double value) {
385  // According to g_fmt.cc, it is sufficient to declare a buffer of size 32.
386  char buffer[32];
387  dmg_fp::g_fmt(buffer, value);
388  return std::string(buffer);
389}
390
391bool StringToInt(const std::string& input, int* output) {
392  return IteratorRangeToNumber<IteratorRangeToIntTraits>::Invoke(input.begin(),
393                                                                 input.end(),
394                                                                 output);
395}
396
397#if !defined(ANDROID)
398bool StringToInt(std::string::const_iterator begin,
399                 std::string::const_iterator end,
400                 int* output) {
401  return IteratorRangeToNumber<IteratorRangeToIntTraits>::Invoke(begin,
402                                                                 end,
403                                                                 output);
404}
405#endif
406
407bool StringToInt(const char* begin, const char* end, int* output) {
408  return IteratorRangeToNumber<CharBufferToIntTraits>::Invoke(begin,
409                                                              end,
410                                                              output);
411}
412
413bool StringToInt(const string16& input, int* output) {
414  return IteratorRangeToNumber<WideIteratorRangeToIntTraits>::Invoke(
415    input.begin(), input.end(), output);
416}
417
418#if !defined(ANDROID)
419bool StringToInt(string16::const_iterator begin,
420                 string16::const_iterator end,
421                 int* output) {
422  return IteratorRangeToNumber<WideIteratorRangeToIntTraits>::Invoke(begin,
423                                                                     end,
424                                                                     output);
425}
426#endif
427
428bool StringToInt(const char16* begin, const char16* end, int* output) {
429  return IteratorRangeToNumber<WideCharBufferToIntTraits>::Invoke(begin,
430                                                                  end,
431                                                                  output);
432}
433
434bool StringToInt64(const std::string& input, int64* output) {
435  return IteratorRangeToNumber<IteratorRangeToInt64Traits>::Invoke(
436    input.begin(), input.end(), output);
437}
438
439#if !defined(ANDROID)
440bool StringToInt64(std::string::const_iterator begin,
441                 std::string::const_iterator end,
442                 int64* output) {
443  return IteratorRangeToNumber<IteratorRangeToInt64Traits>::Invoke(begin,
444                                                                 end,
445                                                                 output);
446}
447#endif
448
449bool StringToInt64(const char* begin, const char* end, int64* output) {
450  return IteratorRangeToNumber<CharBufferToInt64Traits>::Invoke(begin,
451                                                              end,
452                                                              output);
453}
454
455bool StringToInt64(const string16& input, int64* output) {
456  return IteratorRangeToNumber<WideIteratorRangeToInt64Traits>::Invoke(
457    input.begin(), input.end(), output);
458}
459
460#if !defined(ANDROID)
461bool StringToInt64(string16::const_iterator begin,
462                 string16::const_iterator end,
463                 int64* output) {
464  return IteratorRangeToNumber<WideIteratorRangeToInt64Traits>::Invoke(begin,
465                                                                     end,
466                                                                     output);
467}
468#endif
469
470bool StringToInt64(const char16* begin, const char16* end, int64* output) {
471  return IteratorRangeToNumber<WideCharBufferToInt64Traits>::Invoke(begin,
472                                                                  end,
473                                                                  output);
474}
475
476bool StringToDouble(const std::string& input, double* output) {
477  errno = 0;  // Thread-safe?  It is on at least Mac, Linux, and Windows.
478  char* endptr = NULL;
479  *output = dmg_fp::strtod(input.c_str(), &endptr);
480
481  // Cases to return false:
482  //  - If errno is ERANGE, there was an overflow or underflow.
483  //  - If the input string is empty, there was nothing to parse.
484  //  - If endptr does not point to the end of the string, there are either
485  //    characters remaining in the string after a parsed number, or the string
486  //    does not begin with a parseable number.  endptr is compared to the
487  //    expected end given the string's stated length to correctly catch cases
488  //    where the string contains embedded NUL characters.
489  //  - If the first character is a space, there was leading whitespace
490  return errno == 0 &&
491         !input.empty() &&
492         input.c_str() + input.length() == endptr &&
493         !isspace(input[0]);
494}
495
496// Note: if you need to add String16ToDouble, first ask yourself if it's
497// really necessary. If it is, probably the best implementation here is to
498// convert to 8-bit and then use the 8-bit version.
499
500// Note: if you need to add an iterator range version of StringToDouble, first
501// ask yourself if it's really necessary. If it is, probably the best
502// implementation here is to instantiate a string and use the string version.
503
504std::string HexEncode(const void* bytes, size_t size) {
505  static const char kHexChars[] = "0123456789ABCDEF";
506
507  // Each input byte creates two output hex characters.
508  std::string ret(size * 2, '\0');
509
510  for (size_t i = 0; i < size; ++i) {
511    char b = reinterpret_cast<const char*>(bytes)[i];
512    ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
513    ret[(i * 2) + 1] = kHexChars[b & 0xf];
514  }
515  return ret;
516}
517
518bool HexStringToInt(const std::string& input, int* output) {
519  return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke(
520    input.begin(), input.end(), output);
521}
522
523#if !defined(ANDROID)
524bool HexStringToInt(std::string::const_iterator begin,
525                    std::string::const_iterator end,
526                    int* output) {
527  return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke(begin,
528                                                                    end,
529                                                                    output);
530}
531#endif
532
533bool HexStringToInt(const char* begin, const char* end, int* output) {
534  return IteratorRangeToNumber<HexCharBufferToIntTraits>::Invoke(begin,
535                                                                    end,
536                                                                    output);
537}
538
539bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) {
540  return HexStringToBytesT(input, output);
541}
542
543}  // namespace base
544
545