1/*
2 * libjingle
3 * Copyright 2004--2011, Google Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 *  1. Redistributions of source code must retain the above copyright notice,
9 *     this list of conditions and the following disclaimer.
10 *  2. Redistributions in binary form must reproduce the above copyright notice,
11 *     this list of conditions and the following disclaimer in the documentation
12 *     and/or other materials provided with the distribution.
13 *  3. The name of the author may not be used to endorse or promote products
14 *     derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "talk/base/stringencode.h"
29
30#include <cstdio>
31#include <cstdlib>
32
33#include "talk/base/basictypes.h"
34#include "talk/base/common.h"
35#include "talk/base/stringutils.h"
36
37namespace talk_base {
38
39/////////////////////////////////////////////////////////////////////////////
40// String Encoding Utilities
41/////////////////////////////////////////////////////////////////////////////
42
43size_t escape(char * buffer, size_t buflen,
44              const char * source, size_t srclen,
45              const char * illegal, char escape) {
46  ASSERT(NULL != buffer);  // TODO: estimate output size
47  if (buflen <= 0)
48    return 0;
49
50  size_t srcpos = 0, bufpos = 0;
51  while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
52    char ch = source[srcpos++];
53    if ((ch == escape) || ::strchr(illegal, ch)) {
54      if (bufpos + 2 >= buflen)
55        break;
56      buffer[bufpos++] = escape;
57    }
58    buffer[bufpos++] = ch;
59  }
60
61  buffer[bufpos] = '\0';
62  return bufpos;
63}
64
65size_t unescape(char * buffer, size_t buflen,
66                const char * source, size_t srclen,
67                char escape) {
68  ASSERT(NULL != buffer);  // TODO: estimate output size
69  if (buflen <= 0)
70    return 0;
71
72  size_t srcpos = 0, bufpos = 0;
73  while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
74    char ch = source[srcpos++];
75    if ((ch == escape) && (srcpos < srclen)) {
76      ch = source[srcpos++];
77    }
78    buffer[bufpos++] = ch;
79  }
80  buffer[bufpos] = '\0';
81  return bufpos;
82}
83
84size_t encode(char * buffer, size_t buflen,
85              const char * source, size_t srclen,
86              const char * illegal, char escape) {
87  ASSERT(NULL != buffer);  // TODO: estimate output size
88  if (buflen <= 0)
89    return 0;
90
91  size_t srcpos = 0, bufpos = 0;
92  while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
93    char ch = source[srcpos++];
94    if ((ch != escape) && !::strchr(illegal, ch)) {
95      buffer[bufpos++] = ch;
96    } else if (bufpos + 3 >= buflen) {
97      break;
98    } else {
99      buffer[bufpos+0] = escape;
100      buffer[bufpos+1] = hex_encode((static_cast<unsigned char>(ch) >> 4) & 0xF);
101      buffer[bufpos+2] = hex_encode((static_cast<unsigned char>(ch)     ) & 0xF);
102      bufpos += 3;
103    }
104  }
105  buffer[bufpos] = '\0';
106  return bufpos;
107}
108
109size_t decode(char * buffer, size_t buflen,
110              const char * source, size_t srclen,
111              char escape) {
112  if (buflen <= 0)
113    return 0;
114
115  unsigned char h1, h2;
116  size_t srcpos = 0, bufpos = 0;
117  while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
118    char ch = source[srcpos++];
119    if ((ch == escape)
120        && (srcpos + 1 < srclen)
121        && hex_decode(source[srcpos], &h1)
122        && hex_decode(source[srcpos+1], &h2)) {
123      buffer[bufpos++] = (h1 << 4) | h2;
124      srcpos += 2;
125    } else {
126      buffer[bufpos++] = ch;
127    }
128  }
129  buffer[bufpos] = '\0';
130  return bufpos;
131}
132
133const char* unsafe_filename_characters() {
134  // It might be better to have a single specification which is the union of
135  // all operating systems, unless one system is overly restrictive.
136#ifdef WIN32
137  return "\\/:*?\"<>|";
138#else  // !WIN32
139  // TODO
140  ASSERT(false);
141  return "";
142#endif  // !WIN23
143}
144
145const unsigned char URL_UNSAFE  = 0x1; // 0-33 "#$%&+,/:;<=>?@[\]^`{|} 127
146const unsigned char XML_UNSAFE  = 0x2; // "&'<>
147const unsigned char HTML_UNSAFE = 0x2; // "&'<>
148
149//  ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 6 5 7 8 9 : ; < = > ?
150//@ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _
151//` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
152
153const unsigned char ASCII_CLASS[128] = {
154  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
155  1,0,3,1,1,1,3,2,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,3,1,3,1,
156  1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,
157  1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,
158};
159
160size_t url_encode(char * buffer, size_t buflen,
161                  const char * source, size_t srclen) {
162  if (NULL == buffer)
163    return srclen * 3 + 1;
164  if (buflen <= 0)
165    return 0;
166
167  size_t srcpos = 0, bufpos = 0;
168  while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
169    unsigned char ch = source[srcpos++];
170    if ((ch < 128) && (ASCII_CLASS[ch] & URL_UNSAFE)) {
171      if (bufpos + 3 >= buflen) {
172        break;
173      }
174      buffer[bufpos+0] = '%';
175      buffer[bufpos+1] = hex_encode((ch >> 4) & 0xF);
176      buffer[bufpos+2] = hex_encode((ch     ) & 0xF);
177      bufpos += 3;
178    } else {
179      buffer[bufpos++] = ch;
180    }
181  }
182  buffer[bufpos] = '\0';
183  return bufpos;
184}
185
186size_t url_decode(char * buffer, size_t buflen,
187                  const char * source, size_t srclen) {
188  if (NULL == buffer)
189    return srclen + 1;
190  if (buflen <= 0)
191    return 0;
192
193  unsigned char h1, h2;
194  size_t srcpos = 0, bufpos = 0;
195  while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
196    unsigned char ch = source[srcpos++];
197    if (ch == '+') {
198      buffer[bufpos++] = ' ';
199    } else if ((ch == '%')
200               && (srcpos + 1 < srclen)
201               && hex_decode(source[srcpos], &h1)
202               && hex_decode(source[srcpos+1], &h2))
203    {
204      buffer[bufpos++] = (h1 << 4) | h2;
205      srcpos += 2;
206    } else {
207      buffer[bufpos++] = ch;
208    }
209  }
210  buffer[bufpos] = '\0';
211  return bufpos;
212}
213
214size_t utf8_decode(const char* source, size_t srclen, unsigned long* value) {
215  const unsigned char* s = reinterpret_cast<const unsigned char*>(source);
216  if ((s[0] & 0x80) == 0x00) {                    // Check s[0] == 0xxxxxxx
217    *value = s[0];
218    return 1;
219  }
220  if ((srclen < 2) || ((s[1] & 0xC0) != 0x80)) {  // Check s[1] != 10xxxxxx
221    return 0;
222  }
223  // Accumulate the trailer byte values in value16, and combine it with the
224  // relevant bits from s[0], once we've determined the sequence length.
225  unsigned long value16 = (s[1] & 0x3F);
226  if ((s[0] & 0xE0) == 0xC0) {                    // Check s[0] == 110xxxxx
227    *value = ((s[0] & 0x1F) << 6) | value16;
228    return 2;
229  }
230  if ((srclen < 3) || ((s[2] & 0xC0) != 0x80)) {  // Check s[2] != 10xxxxxx
231    return 0;
232  }
233  value16 = (value16 << 6) | (s[2] & 0x3F);
234  if ((s[0] & 0xF0) == 0xE0) {                    // Check s[0] == 1110xxxx
235    *value = ((s[0] & 0x0F) << 12) | value16;
236    return 3;
237  }
238  if ((srclen < 4) || ((s[3] & 0xC0) != 0x80)) {  // Check s[3] != 10xxxxxx
239    return 0;
240  }
241  value16 = (value16 << 6) | (s[3] & 0x3F);
242  if ((s[0] & 0xF8) == 0xF0) {                    // Check s[0] == 11110xxx
243    *value = ((s[0] & 0x07) << 18) | value16;
244    return 4;
245  }
246  return 0;
247}
248
249size_t utf8_encode(char* buffer, size_t buflen, unsigned long value) {
250  if ((value <= 0x7F) && (buflen >= 1)) {
251    buffer[0] = static_cast<unsigned char>(value);
252    return 1;
253  }
254  if ((value <= 0x7FF) && (buflen >= 2)) {
255    buffer[0] = 0xC0 | static_cast<unsigned char>(value >> 6);
256    buffer[1] = 0x80 | static_cast<unsigned char>(value & 0x3F);
257    return 2;
258  }
259  if ((value <= 0xFFFF) && (buflen >= 3)) {
260    buffer[0] = 0xE0 | static_cast<unsigned char>(value >> 12);
261    buffer[1] = 0x80 | static_cast<unsigned char>((value >> 6) & 0x3F);
262    buffer[2] = 0x80 | static_cast<unsigned char>(value & 0x3F);
263    return 3;
264  }
265  if ((value <= 0x1FFFFF) && (buflen >= 4)) {
266    buffer[0] = 0xF0 | static_cast<unsigned char>(value >> 18);
267    buffer[1] = 0x80 | static_cast<unsigned char>((value >> 12) & 0x3F);
268    buffer[2] = 0x80 | static_cast<unsigned char>((value >> 6) & 0x3F);
269    buffer[3] = 0x80 | static_cast<unsigned char>(value & 0x3F);
270    return 4;
271  }
272  return 0;
273}
274
275size_t html_encode(char * buffer, size_t buflen,
276                   const char * source, size_t srclen) {
277  ASSERT(NULL != buffer);  // TODO: estimate output size
278  if (buflen <= 0)
279    return 0;
280
281  size_t srcpos = 0, bufpos = 0;
282  while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
283    unsigned char ch = source[srcpos];
284    if (ch < 128) {
285      srcpos += 1;
286      if (ASCII_CLASS[ch] & HTML_UNSAFE) {
287        const char * escseq = 0;
288        size_t esclen = 0;
289        switch (ch) {
290          case '<':  escseq = "&lt;";   esclen = 4; break;
291          case '>':  escseq = "&gt;";   esclen = 4; break;
292          case '\'': escseq = "&#39;";  esclen = 5; break;
293          case '\"': escseq = "&quot;"; esclen = 6; break;
294          case '&':  escseq = "&amp;";  esclen = 5; break;
295          default: ASSERT(false);
296        }
297        if (bufpos + esclen >= buflen) {
298          break;
299        }
300        memcpy(buffer + bufpos, escseq, esclen);
301        bufpos += esclen;
302      } else {
303        buffer[bufpos++] = ch;
304      }
305    } else {
306      // Largest value is 0x1FFFFF => &#2097151;  (10 characters)
307      char escseq[11];
308      unsigned long val;
309      if (size_t vallen = utf8_decode(&source[srcpos], srclen - srcpos, &val)) {
310        srcpos += vallen;
311      } else {
312        // Not a valid utf8 sequence, just use the raw character.
313        val = static_cast<unsigned char>(source[srcpos++]);
314      }
315      size_t esclen = sprintfn(escseq, ARRAY_SIZE(escseq), "&#%lu;", val);
316      if (bufpos + esclen >= buflen) {
317        break;
318      }
319      memcpy(buffer + bufpos, escseq, esclen);
320      bufpos += esclen;
321    }
322  }
323  buffer[bufpos] = '\0';
324  return bufpos;
325}
326
327size_t html_decode(char * buffer, size_t buflen,
328                   const char * source, size_t srclen) {
329  ASSERT(NULL != buffer);  // TODO: estimate output size
330  return xml_decode(buffer, buflen, source, srclen);
331}
332
333size_t xml_encode(char * buffer, size_t buflen,
334                  const char * source, size_t srclen) {
335  ASSERT(NULL != buffer);  // TODO: estimate output size
336  if (buflen <= 0)
337    return 0;
338
339  size_t srcpos = 0, bufpos = 0;
340  while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
341    unsigned char ch = source[srcpos++];
342    if ((ch < 128) && (ASCII_CLASS[ch] & XML_UNSAFE)) {
343      const char * escseq = 0;
344      size_t esclen = 0;
345      switch (ch) {
346        case '<':  escseq = "&lt;";   esclen = 4; break;
347        case '>':  escseq = "&gt;";   esclen = 4; break;
348        case '\'': escseq = "&apos;"; esclen = 6; break;
349        case '\"': escseq = "&quot;"; esclen = 6; break;
350        case '&':  escseq = "&amp;";  esclen = 5; break;
351        default: ASSERT(false);
352      }
353      if (bufpos + esclen >= buflen) {
354        break;
355      }
356      memcpy(buffer + bufpos, escseq, esclen);
357      bufpos += esclen;
358    } else {
359      buffer[bufpos++] = ch;
360    }
361  }
362  buffer[bufpos] = '\0';
363  return bufpos;
364}
365
366size_t xml_decode(char * buffer, size_t buflen,
367                  const char * source, size_t srclen) {
368  ASSERT(NULL != buffer);  // TODO: estimate output size
369  if (buflen <= 0)
370    return 0;
371
372  size_t srcpos = 0, bufpos = 0;
373  while ((srcpos < srclen) && (bufpos + 1 < buflen)) {
374    unsigned char ch = source[srcpos++];
375    if (ch != '&') {
376      buffer[bufpos++] = ch;
377    } else if ((srcpos + 2 < srclen)
378               && (memcmp(source + srcpos, "lt;", 3) == 0)) {
379      buffer[bufpos++] = '<';
380      srcpos += 3;
381    } else if ((srcpos + 2 < srclen)
382               && (memcmp(source + srcpos, "gt;", 3) == 0)) {
383      buffer[bufpos++] = '>';
384      srcpos += 3;
385    } else if ((srcpos + 4 < srclen)
386               && (memcmp(source + srcpos, "apos;", 5) == 0)) {
387      buffer[bufpos++] = '\'';
388      srcpos += 5;
389    } else if ((srcpos + 4 < srclen)
390               && (memcmp(source + srcpos, "quot;", 5) == 0)) {
391      buffer[bufpos++] = '\"';
392      srcpos += 5;
393    } else if ((srcpos + 3 < srclen)
394               && (memcmp(source + srcpos, "amp;", 4) == 0)) {
395      buffer[bufpos++] = '&';
396      srcpos += 4;
397    } else if ((srcpos < srclen) && (source[srcpos] == '#')) {
398      int int_base = 10;
399      if ((srcpos + 1 < srclen) && (source[srcpos+1] == 'x')) {
400        int_base = 16;
401        srcpos += 1;
402      }
403      char * ptr;
404      // TODO: Fix hack (ptr may go past end of data)
405      unsigned long val = strtoul(source + srcpos + 1, &ptr, int_base);
406      if ((static_cast<size_t>(ptr - source) < srclen) && (*ptr == ';')) {
407        srcpos = ptr - source + 1;
408      } else {
409        // Not a valid escape sequence.
410        break;
411      }
412      if (size_t esclen = utf8_encode(buffer + bufpos, buflen - bufpos, val)) {
413        bufpos += esclen;
414      } else {
415        // Not enough room to encode the character, or illegal character
416        break;
417      }
418    } else {
419      // Unrecognized escape sequence.
420      break;
421    }
422  }
423  buffer[bufpos] = '\0';
424  return bufpos;
425}
426
427static const char HEX[] = "0123456789abcdef";
428
429char hex_encode(unsigned char val) {
430  ASSERT(val < 16);
431  return (val < 16) ? HEX[val] : '!';
432}
433
434bool hex_decode(char ch, unsigned char* val) {
435  if ((ch >= '0') && (ch <= '9')) {
436    *val = ch - '0';
437  } else if ((ch >= 'A') && (ch <= 'Z')) {
438    *val = (ch - 'A') + 10;
439  } else if ((ch >= 'a') && (ch <= 'z')) {
440    *val = (ch - 'a') + 10;
441  } else {
442    return false;
443  }
444  return true;
445}
446
447size_t hex_encode(char* buffer, size_t buflen,
448                  const char* csource, size_t srclen) {
449  return hex_encode_with_delimiter(buffer, buflen, csource, srclen, 0);
450}
451
452size_t hex_encode_with_delimiter(char* buffer, size_t buflen,
453                                 const char* csource, size_t srclen,
454                                 char delimiter) {
455  ASSERT(NULL != buffer);  // TODO: estimate output size
456  if (buflen == 0)
457    return 0;
458
459  // Init and check bounds.
460  const unsigned char* bsource =
461      reinterpret_cast<const unsigned char*>(csource);
462  size_t srcpos = 0, bufpos = 0;
463  size_t needed = delimiter ? (srclen * 3) : (srclen * 2 + 1);
464  if (buflen < needed)
465    return 0;
466
467  while (srcpos < srclen) {
468    unsigned char ch = bsource[srcpos++];
469    buffer[bufpos  ] = hex_encode((ch >> 4) & 0xF);
470    buffer[bufpos+1] = hex_encode((ch     ) & 0xF);
471    bufpos += 2;
472
473    // Don't write a delimiter after the last byte.
474    if (delimiter && (srcpos < srclen)) {
475      buffer[bufpos] = delimiter;
476      ++bufpos;
477    }
478  }
479
480  // Null terminate.
481  buffer[bufpos] = '\0';
482  return bufpos;
483}
484
485std::string hex_encode(const char* source, size_t srclen) {
486  return hex_encode_with_delimiter(source, srclen, 0);
487}
488
489std::string hex_encode_with_delimiter(const char* source, size_t srclen,
490                                      char delimiter) {
491  const size_t kBufferSize = srclen * 3;
492  char* buffer = STACK_ARRAY(char, kBufferSize);
493  size_t length = hex_encode_with_delimiter(buffer, kBufferSize,
494                                            source, srclen, delimiter);
495  ASSERT(srclen == 0 || length > 0);
496  return std::string(buffer, length);
497}
498
499size_t hex_decode(char * cbuffer, size_t buflen,
500                  const char * source, size_t srclen) {
501  return hex_decode_with_delimiter(cbuffer, buflen, source, srclen, 0);
502}
503
504size_t hex_decode_with_delimiter(char* cbuffer, size_t buflen,
505                                 const char* source, size_t srclen,
506                                 char delimiter) {
507  ASSERT(NULL != cbuffer);  // TODO: estimate output size
508  if (buflen == 0)
509    return 0;
510
511  // Init and bounds check.
512  unsigned char* bbuffer = reinterpret_cast<unsigned char*>(cbuffer);
513  size_t srcpos = 0, bufpos = 0;
514  size_t needed = (delimiter) ? (srclen + 1) / 3 : srclen / 2;
515  if (buflen < needed)
516    return 0;
517
518  while (srcpos < srclen) {
519    if ((srclen - srcpos) < 2) {
520      // This means we have an odd number of bytes.
521      return 0;
522    }
523
524    unsigned char h1, h2;
525    if (!hex_decode(source[srcpos], &h1) ||
526        !hex_decode(source[srcpos + 1], &h2))
527      return 0;
528
529    bbuffer[bufpos++] = (h1 << 4) | h2;
530    srcpos += 2;
531
532    // Remove the delimiter if needed.
533    if (delimiter && (srclen - srcpos) > 1) {
534      if (source[srcpos] != delimiter)
535        return 0;
536      ++srcpos;
537    }
538  }
539
540  return bufpos;
541}
542
543size_t hex_decode(char* buffer, size_t buflen, const std::string& source) {
544  return hex_decode_with_delimiter(buffer, buflen, source, 0);
545}
546size_t hex_decode_with_delimiter(char* buffer, size_t buflen,
547                                 const std::string& source, char delimiter) {
548  return hex_decode_with_delimiter(buffer, buflen,
549                                   source.c_str(), source.length(), delimiter);
550}
551
552size_t transform(std::string& value, size_t maxlen, const std::string& source,
553                 Transform t) {
554  char* buffer = STACK_ARRAY(char, maxlen + 1);
555  size_t length = t(buffer, maxlen + 1, source.data(), source.length());
556  value.assign(buffer, length);
557  return length;
558}
559
560std::string s_transform(const std::string& source, Transform t) {
561  // Ask transformation function to approximate the destination size (returns upper bound)
562  size_t maxlen = t(NULL, 0, source.data(), source.length());
563  char * buffer = STACK_ARRAY(char, maxlen);
564  size_t len = t(buffer, maxlen, source.data(), source.length());
565  std::string result(buffer, len);
566  return result;
567}
568
569size_t tokenize(const std::string& source, char delimiter,
570                std::vector<std::string>* fields) {
571  ASSERT(NULL != fields);
572  fields->clear();
573  size_t last = 0;
574  for (size_t i = 0; i < source.length(); ++i) {
575    if (source[i] == delimiter) {
576      if (i != last) {
577        fields->push_back(source.substr(last, i - last));
578      }
579      last = i + 1;
580    }
581  }
582  if (last != source.length()) {
583    fields->push_back(source.substr(last, source.length() - last));
584  }
585  return fields->size();
586}
587
588size_t tokenize_append(const std::string& source, char delimiter,
589                       std::vector<std::string>* fields) {
590  if (!fields) return 0;
591
592  std::vector<std::string> new_fields;
593  tokenize(source, delimiter, &new_fields);
594  fields->insert(fields->end(), new_fields.begin(), new_fields.end());
595  return fields->size();
596}
597
598size_t tokenize(const std::string& source, char delimiter, char start_mark,
599                char end_mark, std::vector<std::string>* fields) {
600  if (!fields) return 0;
601  fields->clear();
602
603  std::string remain_source = source;
604  while (!remain_source.empty()) {
605    size_t start_pos = remain_source.find(start_mark);
606    if (std::string::npos == start_pos) break;
607    std::string pre_mark;
608    if (start_pos > 0) {
609      pre_mark = remain_source.substr(0, start_pos - 1);
610    }
611
612    ++start_pos;
613    size_t end_pos = remain_source.find(end_mark, start_pos);
614    if (std::string::npos == end_pos) break;
615
616    // We have found the matching marks. First tokenize the pre-mask. Then add
617    // the marked part as a single field. Finally, loop back for the post-mark.
618    tokenize_append(pre_mark, delimiter, fields);
619    fields->push_back(remain_source.substr(start_pos, end_pos - start_pos));
620    remain_source = remain_source.substr(end_pos + 1);
621  }
622
623  return tokenize_append(remain_source, delimiter, fields);
624}
625
626size_t split(const std::string& source, char delimiter,
627             std::vector<std::string>* fields) {
628  ASSERT(NULL != fields);
629  fields->clear();
630  size_t last = 0;
631  for (size_t i = 0; i < source.length(); ++i) {
632    if (source[i] == delimiter) {
633      fields->push_back(source.substr(last, i - last));
634      last = i + 1;
635    }
636  }
637  fields->push_back(source.substr(last, source.length() - last));
638  return fields->size();
639}
640
641char make_char_safe_for_filename(char c) {
642  if (c < 32)
643    return '_';
644
645  switch (c) {
646    case '<':
647    case '>':
648    case ':':
649    case '"':
650    case '/':
651    case '\\':
652    case '|':
653    case '*':
654    case '?':
655      return '_';
656
657    default:
658      return c;
659  }
660}
661
662/*
663void sprintf(std::string& value, size_t maxlen, const char * format, ...) {
664  char * buffer = STACK_ARRAY(char, maxlen + 1);
665  va_list args;
666  va_start(args, format);
667  value.assign(buffer, vsprintfn(buffer, maxlen + 1, format, args));
668  va_end(args);
669}
670*/
671
672/////////////////////////////////////////////////////////////////////////////
673
674}  // namespace talk_base
675