1f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch/* 2f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * libjingle 3dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * Copyright 2011, Google Inc. 4f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * 5dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * Redistribution and use in source and binary forms, with or without 6f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * modification, are permitted provided that the following conditions are met: 7f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * 8dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * 1. Redistributions of source code must retain the above copyright notice, 9f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * this list of conditions and the following disclaimer. 10f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * 2. Redistributions in binary form must reproduce the above copyright notice, 11f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * this list of conditions and the following disclaimer in the documentation 12f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * and/or other materials provided with the distribution. 13dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * 3. The name of the author may not be used to endorse or promote products 14f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * derived from this software without specific prior written permission. 15f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * 16f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 17dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 18f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 19dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch */ 27f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 28f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#ifndef TALK_BASE_STRINGENCODE_H__ 29f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#define TALK_BASE_STRINGENCODE_H__ 30f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 31f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#include <string> 32f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#include <sstream> 33f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#include <vector> 34f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 35f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochnamespace talk_base { 36f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 37f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch////////////////////////////////////////////////////////////////////// 38f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// String Encoding Utilities 39f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch////////////////////////////////////////////////////////////////////// 40f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 41f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Convert an unsigned value from 0 to 15 to the hex character equivalent... 42f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochchar hex_encode(unsigned char val); 43f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// ...and vice-versa. 44f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochbool hex_decode(char ch, unsigned char* val); 45f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 46f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Convert an unsigned value to it's utf8 representation. Returns the length 47f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// of the encoded string, or 0 if the encoding is longer than buflen - 1. 48f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t utf8_encode(char* buffer, size_t buflen, unsigned long value); 49f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Decode the utf8 encoded value pointed to by source. Returns the number of 50f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// bytes used by the encoding, or 0 if the encoding is invalid. 51f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t utf8_decode(const char* source, size_t srclen, unsigned long* value); 52f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 53f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Escaping prefixes illegal characters with the escape character. Compact, but 54f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// illegal characters still appear in the string. 55f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t escape(char * buffer, size_t buflen, 56f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen, 57f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * illegal, char escape); 58f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Note: in-place unescaping (buffer == source) is allowed. 59f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t unescape(char * buffer, size_t buflen, 60f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen, 61dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen char escape); 62f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 63f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Encoding replaces illegal characters with the escape character and 2 hex 64f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// chars, so it's a little less compact than escape, but completely removes 65f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// illegal characters. note that hex digits should not be used as illegal 66f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// characters. 67f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t encode(char * buffer, size_t buflen, 68f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen, 69f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * illegal, char escape); 70f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Note: in-place decoding (buffer == source) is allowed. 71f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t decode(char * buffer, size_t buflen, 72f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen, 73f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch char escape); 74f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 75f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Returns a list of characters that may be unsafe for use in the name of a 76f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// file, suitable for passing to the 'illegal' member of escape or encode. 77f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochconst char* unsafe_filename_characters(); 78f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 79f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// url_encode is an encode operation with a predefined set of illegal characters 80f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// and escape character (for use in URLs, obviously). 81f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t url_encode(char * buffer, size_t buflen, 82f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen); 83f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Note: in-place decoding (buffer == source) is allowed. 84f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t url_decode(char * buffer, size_t buflen, 85f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen); 86f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 87f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// html_encode prevents data embedded in html from containing markup. 88f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t html_encode(char * buffer, size_t buflen, 89f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen); 90f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Note: in-place decoding (buffer == source) is allowed. 91f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t html_decode(char * buffer, size_t buflen, 92f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen); 93f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 94f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// xml_encode makes data suitable for inside xml attributes and values. 95f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t xml_encode(char * buffer, size_t buflen, 96f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen); 97f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Note: in-place decoding (buffer == source) is allowed. 98f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t xml_decode(char * buffer, size_t buflen, 99f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen); 100f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 101f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// hex_encode shows the hex representation of binary data in ascii. 102f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t hex_encode(char * buffer, size_t buflen, 103f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen); 104f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t hex_decode(char * buffer, size_t buflen, 105f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen); 106f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// helper funtion for hex_encode 107f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstd::string hex_encode(const char * source, size_t srclen); 108f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 109f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Apply any suitable string transform (including the ones above) to an STL 110f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// string. Stack-allocated temporary space is used for the transformation, 111f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// so value and source may refer to the same string. 112f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtypedef size_t (*Transform)(char * buffer, size_t buflen, 113f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch const char * source, size_t srclen); 114f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t transform(std::string& value, size_t maxlen, const std::string& source, 115f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch Transform t); 116f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 117f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Return the result of applying transform t to source. 118f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstd::string s_transform(const std::string& source, Transform t); 119f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 120f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Convenience wrappers 121f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochinline std::string s_url_encode(const std::string& source) { 122f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch return s_transform(source, url_encode); 123f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch} 124f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochinline std::string s_url_decode(const std::string& source) { 125f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch return s_transform(source, url_decode); 126f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch} 127f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 128dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Splits the source string into multiple fields separated by delimiter, 129dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// with duplicates of delimiter creating empty fields. 130f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochsize_t split(const std::string& source, char delimiter, 131f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch std::vector<std::string>* fields); 132f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 133dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Splits the source string into multiple fields separated by delimiter, 134dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// with duplicates of delimiter ignored. Trailing delimiter ignored. 135dc0f95d653279beabeb9817299e2902918ba123eKristian Monsensize_t tokenize(const std::string& source, char delimiter, 136dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen std::vector<std::string>* fields); 137f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 138f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Safe sprintf to std::string 139f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch//void sprintf(std::string& value, size_t maxlen, const char * format, ...) 140f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// PRINTF_FORMAT(3); 141f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 142f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Convert arbitrary values to/from a string. 143f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 144f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtemplate <class T> 145f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstatic bool ToString(const T &t, std::string* s) { 146f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch std::ostringstream oss; 147f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch oss << t; 148f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch *s = oss.str(); 149f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch return !oss.fail(); 150f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch} 151f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 152f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtemplate <class T> 153f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstatic bool FromString(const std::string& s, T* t) { 154f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch std::istringstream iss(s); 155f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch iss >> *t; 156f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch return !iss.fail(); 157f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch} 158f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 159f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// Inline versions of the string conversion routines. 160f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 161f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtemplate<typename T> 162f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstatic inline std::string ToString(T val) { 163f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch std::string str; ToString(val, &str); return str; 164f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch} 165f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 166f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtemplate<typename T> 167f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstatic inline T FromString(const std::string& str) { 168f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch T val; FromString(str, &val); return val; 169f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch} 170f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 171f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochtemplate<typename T> 172f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochstatic inline T FromString(const T& defaultValue, const std::string& str) { 173f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch T val(defaultValue); FromString(str, &val); return val; 174f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch} 175f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 176f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// simple function to strip out characters which shouldn't be 177f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch// used in filenames 178f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdochchar make_char_safe_for_filename(char c); 179f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 180f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch////////////////////////////////////////////////////////////////////// 181f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 182f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch} // namespace talk_base 183f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch 184f74420b3285b9fe04a7e00aa3b8c0ab07ea344bcBen Murdoch#endif // TALK_BASE_STRINGENCODE_H__ 185