1/* 2 * libjingle 3 * Copyright 2011, Google Inc. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#ifndef TALK_BASE_STRINGENCODE_H__ 29#define TALK_BASE_STRINGENCODE_H__ 30 31#include <string> 32#include <sstream> 33#include <vector> 34 35namespace talk_base { 36 37////////////////////////////////////////////////////////////////////// 38// String Encoding Utilities 39////////////////////////////////////////////////////////////////////// 40 41// Convert an unsigned value from 0 to 15 to the hex character equivalent... 42char hex_encode(unsigned char val); 43// ...and vice-versa. 44bool hex_decode(char ch, unsigned char* val); 45 46// Convert an unsigned value to it's utf8 representation. Returns the length 47// of the encoded string, or 0 if the encoding is longer than buflen - 1. 48size_t utf8_encode(char* buffer, size_t buflen, unsigned long value); 49// Decode the utf8 encoded value pointed to by source. Returns the number of 50// bytes used by the encoding, or 0 if the encoding is invalid. 51size_t utf8_decode(const char* source, size_t srclen, unsigned long* value); 52 53// Escaping prefixes illegal characters with the escape character. Compact, but 54// illegal characters still appear in the string. 55size_t escape(char * buffer, size_t buflen, 56 const char * source, size_t srclen, 57 const char * illegal, char escape); 58// Note: in-place unescaping (buffer == source) is allowed. 59size_t unescape(char * buffer, size_t buflen, 60 const char * source, size_t srclen, 61 char escape); 62 63// Encoding replaces illegal characters with the escape character and 2 hex 64// chars, so it's a little less compact than escape, but completely removes 65// illegal characters. note that hex digits should not be used as illegal 66// characters. 67size_t encode(char * buffer, size_t buflen, 68 const char * source, size_t srclen, 69 const char * illegal, char escape); 70// Note: in-place decoding (buffer == source) is allowed. 71size_t decode(char * buffer, size_t buflen, 72 const char * source, size_t srclen, 73 char escape); 74 75// Returns a list of characters that may be unsafe for use in the name of a 76// file, suitable for passing to the 'illegal' member of escape or encode. 77const char* unsafe_filename_characters(); 78 79// url_encode is an encode operation with a predefined set of illegal characters 80// and escape character (for use in URLs, obviously). 81size_t url_encode(char * buffer, size_t buflen, 82 const char * source, size_t srclen); 83// Note: in-place decoding (buffer == source) is allowed. 84size_t url_decode(char * buffer, size_t buflen, 85 const char * source, size_t srclen); 86 87// html_encode prevents data embedded in html from containing markup. 88size_t html_encode(char * buffer, size_t buflen, 89 const char * source, size_t srclen); 90// Note: in-place decoding (buffer == source) is allowed. 91size_t html_decode(char * buffer, size_t buflen, 92 const char * source, size_t srclen); 93 94// xml_encode makes data suitable for inside xml attributes and values. 95size_t xml_encode(char * buffer, size_t buflen, 96 const char * source, size_t srclen); 97// Note: in-place decoding (buffer == source) is allowed. 98size_t xml_decode(char * buffer, size_t buflen, 99 const char * source, size_t srclen); 100 101// hex_encode shows the hex representation of binary data in ascii. 102size_t hex_encode(char * buffer, size_t buflen, 103 const char * source, size_t srclen); 104size_t hex_decode(char * buffer, size_t buflen, 105 const char * source, size_t srclen); 106// helper funtion for hex_encode 107std::string hex_encode(const char * source, size_t srclen); 108 109// Apply any suitable string transform (including the ones above) to an STL 110// string. Stack-allocated temporary space is used for the transformation, 111// so value and source may refer to the same string. 112typedef size_t (*Transform)(char * buffer, size_t buflen, 113 const char * source, size_t srclen); 114size_t transform(std::string& value, size_t maxlen, const std::string& source, 115 Transform t); 116 117// Return the result of applying transform t to source. 118std::string s_transform(const std::string& source, Transform t); 119 120// Convenience wrappers 121inline std::string s_url_encode(const std::string& source) { 122 return s_transform(source, url_encode); 123} 124inline std::string s_url_decode(const std::string& source) { 125 return s_transform(source, url_decode); 126} 127 128// Splits the source string into multiple fields separated by delimiter, 129// with duplicates of delimiter creating empty fields. 130size_t split(const std::string& source, char delimiter, 131 std::vector<std::string>* fields); 132 133// Splits the source string into multiple fields separated by delimiter, 134// with duplicates of delimiter ignored. Trailing delimiter ignored. 135size_t tokenize(const std::string& source, char delimiter, 136 std::vector<std::string>* fields); 137 138// Safe sprintf to std::string 139//void sprintf(std::string& value, size_t maxlen, const char * format, ...) 140// PRINTF_FORMAT(3); 141 142// Convert arbitrary values to/from a string. 143 144template <class T> 145static bool ToString(const T &t, std::string* s) { 146 std::ostringstream oss; 147 oss << t; 148 *s = oss.str(); 149 return !oss.fail(); 150} 151 152template <class T> 153static bool FromString(const std::string& s, T* t) { 154 std::istringstream iss(s); 155 iss >> *t; 156 return !iss.fail(); 157} 158 159// Inline versions of the string conversion routines. 160 161template<typename T> 162static inline std::string ToString(T val) { 163 std::string str; ToString(val, &str); return str; 164} 165 166template<typename T> 167static inline T FromString(const std::string& str) { 168 T val; FromString(str, &val); return val; 169} 170 171template<typename T> 172static inline T FromString(const T& defaultValue, const std::string& str) { 173 T val(defaultValue); FromString(str, &val); return val; 174} 175 176// simple function to strip out characters which shouldn't be 177// used in filenames 178char make_char_safe_for_filename(char c); 179 180////////////////////////////////////////////////////////////////////// 181 182} // namespace talk_base 183 184#endif // TALK_BASE_STRINGENCODE_H__ 185