1/* 2 * Copyright 2004 The WebRTC Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#ifndef WEBRTC_BASE_STRINGENCODE_H_ 12#define WEBRTC_BASE_STRINGENCODE_H_ 13 14#include <string> 15#include <sstream> 16#include <vector> 17 18#include "webrtc/base/checks.h" 19 20namespace rtc { 21 22////////////////////////////////////////////////////////////////////// 23// String Encoding Utilities 24////////////////////////////////////////////////////////////////////// 25 26// Convert an unsigned value to it's utf8 representation. Returns the length 27// of the encoded string, or 0 if the encoding is longer than buflen - 1. 28size_t utf8_encode(char* buffer, size_t buflen, unsigned long value); 29// Decode the utf8 encoded value pointed to by source. Returns the number of 30// bytes used by the encoding, or 0 if the encoding is invalid. 31size_t utf8_decode(const char* source, size_t srclen, unsigned long* value); 32 33// Escaping prefixes illegal characters with the escape character. Compact, but 34// illegal characters still appear in the string. 35size_t escape(char * buffer, size_t buflen, 36 const char * source, size_t srclen, 37 const char * illegal, char escape); 38// Note: in-place unescaping (buffer == source) is allowed. 39size_t unescape(char * buffer, size_t buflen, 40 const char * source, size_t srclen, 41 char escape); 42 43// Encoding replaces illegal characters with the escape character and 2 hex 44// chars, so it's a little less compact than escape, but completely removes 45// illegal characters. note that hex digits should not be used as illegal 46// characters. 47size_t encode(char * buffer, size_t buflen, 48 const char * source, size_t srclen, 49 const char * illegal, char escape); 50// Note: in-place decoding (buffer == source) is allowed. 51size_t decode(char * buffer, size_t buflen, 52 const char * source, size_t srclen, 53 char escape); 54 55// Returns a list of characters that may be unsafe for use in the name of a 56// file, suitable for passing to the 'illegal' member of escape or encode. 57const char* unsafe_filename_characters(); 58 59// url_encode is an encode operation with a predefined set of illegal characters 60// and escape character (for use in URLs, obviously). 61size_t url_encode(char * buffer, size_t buflen, 62 const char * source, size_t srclen); 63// Note: in-place decoding (buffer == source) is allowed. 64size_t url_decode(char * buffer, size_t buflen, 65 const char * source, size_t srclen); 66 67// html_encode prevents data embedded in html from containing markup. 68size_t html_encode(char * buffer, size_t buflen, 69 const char * source, size_t srclen); 70// Note: in-place decoding (buffer == source) is allowed. 71size_t html_decode(char * buffer, size_t buflen, 72 const char * source, size_t srclen); 73 74// xml_encode makes data suitable for inside xml attributes and values. 75size_t xml_encode(char * buffer, size_t buflen, 76 const char * source, size_t srclen); 77// Note: in-place decoding (buffer == source) is allowed. 78size_t xml_decode(char * buffer, size_t buflen, 79 const char * source, size_t srclen); 80 81// Convert an unsigned value from 0 to 15 to the hex character equivalent... 82char hex_encode(unsigned char val); 83// ...and vice-versa. 84bool hex_decode(char ch, unsigned char* val); 85 86// hex_encode shows the hex representation of binary data in ascii. 87size_t hex_encode(char* buffer, size_t buflen, 88 const char* source, size_t srclen); 89 90// hex_encode, but separate each byte representation with a delimiter. 91// |delimiter| == 0 means no delimiter 92// If the buffer is too short, we return 0 93size_t hex_encode_with_delimiter(char* buffer, size_t buflen, 94 const char* source, size_t srclen, 95 char delimiter); 96 97// Helper functions for hex_encode. 98std::string hex_encode(const char* source, size_t srclen); 99std::string hex_encode_with_delimiter(const char* source, size_t srclen, 100 char delimiter); 101 102// hex_decode converts ascii hex to binary. 103size_t hex_decode(char* buffer, size_t buflen, 104 const char* source, size_t srclen); 105 106// hex_decode, assuming that there is a delimiter between every byte 107// pair. 108// |delimiter| == 0 means no delimiter 109// If the buffer is too short or the data is invalid, we return 0. 110size_t hex_decode_with_delimiter(char* buffer, size_t buflen, 111 const char* source, size_t srclen, 112 char delimiter); 113 114// Helper functions for hex_decode. 115size_t hex_decode(char* buffer, size_t buflen, const std::string& source); 116size_t hex_decode_with_delimiter(char* buffer, size_t buflen, 117 const std::string& source, char delimiter); 118 119// Apply any suitable string transform (including the ones above) to an STL 120// string. Stack-allocated temporary space is used for the transformation, 121// so value and source may refer to the same string. 122typedef size_t (*Transform)(char * buffer, size_t buflen, 123 const char * source, size_t srclen); 124size_t transform(std::string& value, size_t maxlen, const std::string& source, 125 Transform t); 126 127// Return the result of applying transform t to source. 128std::string s_transform(const std::string& source, Transform t); 129 130// Convenience wrappers. 131inline std::string s_url_encode(const std::string& source) { 132 return s_transform(source, url_encode); 133} 134inline std::string s_url_decode(const std::string& source) { 135 return s_transform(source, url_decode); 136} 137 138// Splits the source string into multiple fields separated by delimiter, 139// with duplicates of delimiter creating empty fields. 140size_t split(const std::string& source, char delimiter, 141 std::vector<std::string>* fields); 142 143// Splits the source string into multiple fields separated by delimiter, 144// with duplicates of delimiter ignored. Trailing delimiter ignored. 145size_t tokenize(const std::string& source, char delimiter, 146 std::vector<std::string>* fields); 147 148// Tokenize and append the tokens to fields. Return the new size of fields. 149size_t tokenize_append(const std::string& source, char delimiter, 150 std::vector<std::string>* fields); 151 152// Splits the source string into multiple fields separated by delimiter, with 153// duplicates of delimiter ignored. Trailing delimiter ignored. A substring in 154// between the start_mark and the end_mark is treated as a single field. Return 155// the size of fields. For example, if source is "filename 156// \"/Library/Application Support/media content.txt\"", delimiter is ' ', and 157// the start_mark and end_mark are '"', this method returns two fields: 158// "filename" and "/Library/Application Support/media content.txt". 159size_t tokenize(const std::string& source, char delimiter, char start_mark, 160 char end_mark, std::vector<std::string>* fields); 161 162// Safe sprintf to std::string 163//void sprintf(std::string& value, size_t maxlen, const char * format, ...) 164// PRINTF_FORMAT(3); 165 166// Convert arbitrary values to/from a string. 167 168template <class T> 169static bool ToString(const T &t, std::string* s) { 170 DCHECK(s); 171 std::ostringstream oss; 172 oss << std::boolalpha << t; 173 *s = oss.str(); 174 return !oss.fail(); 175} 176 177template <class T> 178static bool FromString(const std::string& s, T* t) { 179 DCHECK(t); 180 std::istringstream iss(s); 181 iss >> std::boolalpha >> *t; 182 return !iss.fail(); 183} 184 185// Inline versions of the string conversion routines. 186 187template<typename T> 188static inline std::string ToString(const T& val) { 189 std::string str; ToString(val, &str); return str; 190} 191 192template<typename T> 193static inline T FromString(const std::string& str) { 194 T val; FromString(str, &val); return val; 195} 196 197template<typename T> 198static inline T FromString(const T& defaultValue, const std::string& str) { 199 T val(defaultValue); FromString(str, &val); return val; 200} 201 202// simple function to strip out characters which shouldn't be 203// used in filenames 204char make_char_safe_for_filename(char c); 205 206////////////////////////////////////////////////////////////////////// 207 208} // namespace rtc 209 210#endif // WEBRTC_BASE_STRINGENCODE_H__ 211