1/*
2 * libjingle
3 * Copyright 2011, Google Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 *  1. Redistributions of source code must retain the above copyright notice,
9 *     this list of conditions and the following disclaimer.
10 *  2. Redistributions in binary form must reproduce the above copyright notice,
11 *     this list of conditions and the following disclaimer in the documentation
12 *     and/or other materials provided with the distribution.
13 *  3. The name of the author may not be used to endorse or promote products
14 *     derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#ifndef TALK_BASE_STRINGENCODE_H__
29#define TALK_BASE_STRINGENCODE_H__
30
31#include <string>
32#include <sstream>
33#include <vector>
34
35namespace talk_base {
36
37//////////////////////////////////////////////////////////////////////
38// String Encoding Utilities
39//////////////////////////////////////////////////////////////////////
40
41// Convert an unsigned value from 0 to 15 to the hex character equivalent...
42char hex_encode(unsigned char val);
43// ...and vice-versa.
44bool hex_decode(char ch, unsigned char* val);
45
46// Convert an unsigned value to it's utf8 representation.  Returns the length
47// of the encoded string, or 0 if the encoding is longer than buflen - 1.
48size_t utf8_encode(char* buffer, size_t buflen, unsigned long value);
49// Decode the utf8 encoded value pointed to by source.  Returns the number of
50// bytes used by the encoding, or 0 if the encoding is invalid.
51size_t utf8_decode(const char* source, size_t srclen, unsigned long* value);
52
53// Escaping prefixes illegal characters with the escape character.  Compact, but
54// illegal characters still appear in the string.
55size_t escape(char * buffer, size_t buflen,
56              const char * source, size_t srclen,
57              const char * illegal, char escape);
58// Note: in-place unescaping (buffer == source) is allowed.
59size_t unescape(char * buffer, size_t buflen,
60                const char * source, size_t srclen,
61                char escape);
62
63// Encoding replaces illegal characters with the escape character and 2 hex
64// chars, so it's a little less compact than escape, but completely removes
65// illegal characters.  note that hex digits should not be used as illegal
66// characters.
67size_t encode(char * buffer, size_t buflen,
68              const char * source, size_t srclen,
69              const char * illegal, char escape);
70// Note: in-place decoding (buffer == source) is allowed.
71size_t decode(char * buffer, size_t buflen,
72              const char * source, size_t srclen,
73              char escape);
74
75// Returns a list of characters that may be unsafe for use in the name of a
76// file, suitable for passing to the 'illegal' member of escape or encode.
77const char* unsafe_filename_characters();
78
79// url_encode is an encode operation with a predefined set of illegal characters
80// and escape character (for use in URLs, obviously).
81size_t url_encode(char * buffer, size_t buflen,
82                  const char * source, size_t srclen);
83// Note: in-place decoding (buffer == source) is allowed.
84size_t url_decode(char * buffer, size_t buflen,
85                  const char * source, size_t srclen);
86
87// html_encode prevents data embedded in html from containing markup.
88size_t html_encode(char * buffer, size_t buflen,
89                   const char * source, size_t srclen);
90// Note: in-place decoding (buffer == source) is allowed.
91size_t html_decode(char * buffer, size_t buflen,
92                   const char * source, size_t srclen);
93
94// xml_encode makes data suitable for inside xml attributes and values.
95size_t xml_encode(char * buffer, size_t buflen,
96                  const char * source, size_t srclen);
97// Note: in-place decoding (buffer == source) is allowed.
98size_t xml_decode(char * buffer, size_t buflen,
99                  const char * source, size_t srclen);
100
101// hex_encode shows the hex representation of binary data in ascii.
102size_t hex_encode(char * buffer, size_t buflen,
103                  const char * source, size_t srclen);
104size_t hex_decode(char * buffer, size_t buflen,
105                  const char * source, size_t srclen);
106// helper funtion for hex_encode
107std::string hex_encode(const char * source, size_t srclen);
108
109// Apply any suitable string transform (including the ones above) to an STL
110// string.  Stack-allocated temporary space is used for the transformation,
111// so value and source may refer to the same string.
112typedef size_t (*Transform)(char * buffer, size_t buflen,
113                            const char * source, size_t srclen);
114size_t transform(std::string& value, size_t maxlen, const std::string& source,
115                 Transform t);
116
117// Return the result of applying transform t to source.
118std::string s_transform(const std::string& source, Transform t);
119
120// Convenience wrappers
121inline std::string s_url_encode(const std::string& source) {
122  return s_transform(source, url_encode);
123}
124inline std::string s_url_decode(const std::string& source) {
125  return s_transform(source, url_decode);
126}
127
128// Splits the source string into multiple fields separated by delimiter,
129// with duplicates of delimiter creating empty fields.
130size_t split(const std::string& source, char delimiter,
131             std::vector<std::string>* fields);
132
133// Splits the source string into multiple fields separated by delimiter,
134// with duplicates of delimiter ignored.  Trailing delimiter ignored.
135size_t tokenize(const std::string& source, char delimiter,
136                std::vector<std::string>* fields);
137
138// Safe sprintf to std::string
139//void sprintf(std::string& value, size_t maxlen, const char * format, ...)
140//     PRINTF_FORMAT(3);
141
142// Convert arbitrary values to/from a string.
143
144template <class T>
145static bool ToString(const T &t, std::string* s) {
146  std::ostringstream oss;
147  oss << t;
148  *s = oss.str();
149  return !oss.fail();
150}
151
152template <class T>
153static bool FromString(const std::string& s, T* t) {
154  std::istringstream iss(s);
155  iss >> *t;
156  return !iss.fail();
157}
158
159// Inline versions of the string conversion routines.
160
161template<typename T>
162static inline std::string ToString(T val) {
163  std::string str; ToString(val, &str); return str;
164}
165
166template<typename T>
167static inline T FromString(const std::string& str) {
168  T val; FromString(str, &val); return val;
169}
170
171template<typename T>
172static inline T FromString(const T& defaultValue, const std::string& str) {
173  T val(defaultValue); FromString(str, &val); return val;
174}
175
176// simple function to strip out characters which shouldn't be
177// used in filenames
178char make_char_safe_for_filename(char c);
179
180//////////////////////////////////////////////////////////////////////
181
182}  // namespace talk_base
183
184#endif  // TALK_BASE_STRINGENCODE_H__
185