1f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch/*
2f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * libjingle
3dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * Copyright 2011, Google Inc.
4f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch *
5dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * Redistribution and use in source and binary forms, with or without
6f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * modification, are permitted provided that the following conditions are met:
7f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch *
8dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen *  1. Redistributions of source code must retain the above copyright notice,
9f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch *     this list of conditions and the following disclaimer.
10f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch *  2. Redistributions in binary form must reproduce the above copyright notice,
11f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch *     this list of conditions and the following disclaimer in the documentation
12f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch *     and/or other materials provided with the distribution.
13dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen *  3. The name of the author may not be used to endorse or promote products
14f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch *     derived from this software without specific prior written permission.
15f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch *
16f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch */
27f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
28f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#ifndef TALK_BASE_STRINGENCODE_H__
29f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#define TALK_BASE_STRINGENCODE_H__
30f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
31f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#include <string>
32f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#include <sstream>
33f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#include <vector>
34f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
35f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochnamespace talk_base {
36f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
37f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch//////////////////////////////////////////////////////////////////////
38f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// String Encoding Utilities
39f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch//////////////////////////////////////////////////////////////////////
40f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
41f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Convert an unsigned value from 0 to 15 to the hex character equivalent...
42f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochchar hex_encode(unsigned char val);
43f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// ...and vice-versa.
44f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochbool hex_decode(char ch, unsigned char* val);
45f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
46f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Convert an unsigned value to it's utf8 representation.  Returns the length
47f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// of the encoded string, or 0 if the encoding is longer than buflen - 1.
48f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t utf8_encode(char* buffer, size_t buflen, unsigned long value);
49f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Decode the utf8 encoded value pointed to by source.  Returns the number of
50f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// bytes used by the encoding, or 0 if the encoding is invalid.
51f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t utf8_decode(const char* source, size_t srclen, unsigned long* value);
52f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
53f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Escaping prefixes illegal characters with the escape character.  Compact, but
54f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// illegal characters still appear in the string.
55f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t escape(char * buffer, size_t buflen,
56f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch              const char * source, size_t srclen,
57f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch              const char * illegal, char escape);
58f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Note: in-place unescaping (buffer == source) is allowed.
59f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t unescape(char * buffer, size_t buflen,
60f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                const char * source, size_t srclen,
61dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                char escape);
62f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
63f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Encoding replaces illegal characters with the escape character and 2 hex
64f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// chars, so it's a little less compact than escape, but completely removes
65f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// illegal characters.  note that hex digits should not be used as illegal
66f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// characters.
67f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t encode(char * buffer, size_t buflen,
68f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch              const char * source, size_t srclen,
69f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch              const char * illegal, char escape);
70f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Note: in-place decoding (buffer == source) is allowed.
71f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t decode(char * buffer, size_t buflen,
72f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch              const char * source, size_t srclen,
73f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch              char escape);
74f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
75f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Returns a list of characters that may be unsafe for use in the name of a
76f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// file, suitable for passing to the 'illegal' member of escape or encode.
77f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochconst char* unsafe_filename_characters();
78f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
79f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// url_encode is an encode operation with a predefined set of illegal characters
80f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// and escape character (for use in URLs, obviously).
81f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t url_encode(char * buffer, size_t buflen,
82f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                  const char * source, size_t srclen);
83f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Note: in-place decoding (buffer == source) is allowed.
84f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t url_decode(char * buffer, size_t buflen,
85f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                  const char * source, size_t srclen);
86f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
87f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// html_encode prevents data embedded in html from containing markup.
88f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t html_encode(char * buffer, size_t buflen,
89f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                   const char * source, size_t srclen);
90f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Note: in-place decoding (buffer == source) is allowed.
91f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t html_decode(char * buffer, size_t buflen,
92f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                   const char * source, size_t srclen);
93f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
94f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// xml_encode makes data suitable for inside xml attributes and values.
95f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t xml_encode(char * buffer, size_t buflen,
96f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                  const char * source, size_t srclen);
97f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Note: in-place decoding (buffer == source) is allowed.
98f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t xml_decode(char * buffer, size_t buflen,
99f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                  const char * source, size_t srclen);
100f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
101f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// hex_encode shows the hex representation of binary data in ascii.
102f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t hex_encode(char * buffer, size_t buflen,
103f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                  const char * source, size_t srclen);
104f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t hex_decode(char * buffer, size_t buflen,
105f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                  const char * source, size_t srclen);
106f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// helper funtion for hex_encode
107f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstd::string hex_encode(const char * source, size_t srclen);
108f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
109f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Apply any suitable string transform (including the ones above) to an STL
110f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// string.  Stack-allocated temporary space is used for the transformation,
111f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// so value and source may refer to the same string.
112f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtypedef size_t (*Transform)(char * buffer, size_t buflen,
113f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                            const char * source, size_t srclen);
114f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t transform(std::string& value, size_t maxlen, const std::string& source,
115f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch                 Transform t);
116f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
117f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Return the result of applying transform t to source.
118f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstd::string s_transform(const std::string& source, Transform t);
119f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
120f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Convenience wrappers
121f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochinline std::string s_url_encode(const std::string& source) {
122f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  return s_transform(source, url_encode);
123f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch}
124f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochinline std::string s_url_decode(const std::string& source) {
125f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  return s_transform(source, url_decode);
126f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch}
127f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
128dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Splits the source string into multiple fields separated by delimiter,
129dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// with duplicates of delimiter creating empty fields.
130f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t split(const std::string& source, char delimiter,
131f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch             std::vector<std::string>* fields);
132f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
133dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Splits the source string into multiple fields separated by delimiter,
134dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// with duplicates of delimiter ignored.  Trailing delimiter ignored.
135dc0f95d653279beabeb9817299e2902918ba123eKristian Monsensize_t tokenize(const std::string& source, char delimiter,
136dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                std::vector<std::string>* fields);
137f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
138f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Safe sprintf to std::string
139f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch//void sprintf(std::string& value, size_t maxlen, const char * format, ...)
140f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch//     PRINTF_FORMAT(3);
141f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
142f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Convert arbitrary values to/from a string.
143f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
144f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtemplate <class T>
145f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstatic bool ToString(const T &t, std::string* s) {
146f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  std::ostringstream oss;
147f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  oss << t;
148f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  *s = oss.str();
149f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  return !oss.fail();
150f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch}
151f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
152f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtemplate <class T>
153f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstatic bool FromString(const std::string& s, T* t) {
154f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  std::istringstream iss(s);
155f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  iss >> *t;
156f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  return !iss.fail();
157f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch}
158f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
159f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Inline versions of the string conversion routines.
160f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
161f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtemplate<typename T>
162f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstatic inline std::string ToString(T val) {
163f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  std::string str; ToString(val, &str); return str;
164f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch}
165f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
166f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtemplate<typename T>
167f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstatic inline T FromString(const std::string& str) {
168f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  T val; FromString(str, &val); return val;
169f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch}
170f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
171f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtemplate<typename T>
172f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstatic inline T FromString(const T& defaultValue, const std::string& str) {
173f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch  T val(defaultValue); FromString(str, &val); return val;
174f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch}
175f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
176f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// simple function to strip out characters which shouldn't be
177f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// used in filenames
178f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochchar make_char_safe_for_filename(char c);
179f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
180f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch//////////////////////////////////////////////////////////////////////
181f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
182f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch}  // namespace talk_base
183f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch
184f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#endif  // TALK_BASE_STRINGENCODE_H__
185