1/* 2 * Copyright 2004 The WebRTC Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#ifndef WEBRTC_BASE_STRINGENCODE_H_ 12#define WEBRTC_BASE_STRINGENCODE_H_ 13 14#include <sstream> 15#include <string> 16#include <vector> 17 18#include "webrtc/base/checks.h" 19 20namespace rtc { 21 22////////////////////////////////////////////////////////////////////// 23// String Encoding Utilities 24////////////////////////////////////////////////////////////////////// 25 26// Convert an unsigned value to it's utf8 representation. Returns the length 27// of the encoded string, or 0 if the encoding is longer than buflen - 1. 28size_t utf8_encode(char* buffer, size_t buflen, unsigned long value); 29// Decode the utf8 encoded value pointed to by source. Returns the number of 30// bytes used by the encoding, or 0 if the encoding is invalid. 31size_t utf8_decode(const char* source, size_t srclen, unsigned long* value); 32 33// Escaping prefixes illegal characters with the escape character. Compact, but 34// illegal characters still appear in the string. 35size_t escape(char * buffer, size_t buflen, 36 const char * source, size_t srclen, 37 const char * illegal, char escape); 38// Note: in-place unescaping (buffer == source) is allowed. 39size_t unescape(char * buffer, size_t buflen, 40 const char * source, size_t srclen, 41 char escape); 42 43// Encoding replaces illegal characters with the escape character and 2 hex 44// chars, so it's a little less compact than escape, but completely removes 45// illegal characters. note that hex digits should not be used as illegal 46// characters. 47size_t encode(char * buffer, size_t buflen, 48 const char * source, size_t srclen, 49 const char * illegal, char escape); 50// Note: in-place decoding (buffer == source) is allowed. 51size_t decode(char * buffer, size_t buflen, 52 const char * source, size_t srclen, 53 char escape); 54 55// Returns a list of characters that may be unsafe for use in the name of a 56// file, suitable for passing to the 'illegal' member of escape or encode. 57const char* unsafe_filename_characters(); 58 59// url_encode is an encode operation with a predefined set of illegal characters 60// and escape character (for use in URLs, obviously). 61size_t url_encode(char * buffer, size_t buflen, 62 const char * source, size_t srclen); 63// Note: in-place decoding (buffer == source) is allowed. 64size_t url_decode(char * buffer, size_t buflen, 65 const char * source, size_t srclen); 66 67// html_encode prevents data embedded in html from containing markup. 68size_t html_encode(char * buffer, size_t buflen, 69 const char * source, size_t srclen); 70// Note: in-place decoding (buffer == source) is allowed. 71size_t html_decode(char * buffer, size_t buflen, 72 const char * source, size_t srclen); 73 74// xml_encode makes data suitable for inside xml attributes and values. 75size_t xml_encode(char * buffer, size_t buflen, 76 const char * source, size_t srclen); 77// Note: in-place decoding (buffer == source) is allowed. 78size_t xml_decode(char * buffer, size_t buflen, 79 const char * source, size_t srclen); 80 81// Convert an unsigned value from 0 to 15 to the hex character equivalent... 82char hex_encode(unsigned char val); 83// ...and vice-versa. 84bool hex_decode(char ch, unsigned char* val); 85 86// hex_encode shows the hex representation of binary data in ascii. 87size_t hex_encode(char* buffer, size_t buflen, 88 const char* source, size_t srclen); 89 90// hex_encode, but separate each byte representation with a delimiter. 91// |delimiter| == 0 means no delimiter 92// If the buffer is too short, we return 0 93size_t hex_encode_with_delimiter(char* buffer, size_t buflen, 94 const char* source, size_t srclen, 95 char delimiter); 96 97// Helper functions for hex_encode. 98std::string hex_encode(const std::string& str); 99std::string hex_encode(const char* source, size_t srclen); 100std::string hex_encode_with_delimiter(const char* source, size_t srclen, 101 char delimiter); 102 103// hex_decode converts ascii hex to binary. 104size_t hex_decode(char* buffer, size_t buflen, 105 const char* source, size_t srclen); 106 107// hex_decode, assuming that there is a delimiter between every byte 108// pair. 109// |delimiter| == 0 means no delimiter 110// If the buffer is too short or the data is invalid, we return 0. 111size_t hex_decode_with_delimiter(char* buffer, size_t buflen, 112 const char* source, size_t srclen, 113 char delimiter); 114 115// Helper functions for hex_decode. 116size_t hex_decode(char* buffer, size_t buflen, const std::string& source); 117size_t hex_decode_with_delimiter(char* buffer, size_t buflen, 118 const std::string& source, char delimiter); 119 120// Apply any suitable string transform (including the ones above) to an STL 121// string. Stack-allocated temporary space is used for the transformation, 122// so value and source may refer to the same string. 123typedef size_t (*Transform)(char * buffer, size_t buflen, 124 const char * source, size_t srclen); 125size_t transform(std::string& value, size_t maxlen, const std::string& source, 126 Transform t); 127 128// Return the result of applying transform t to source. 129std::string s_transform(const std::string& source, Transform t); 130 131// Convenience wrappers. 132inline std::string s_url_encode(const std::string& source) { 133 return s_transform(source, url_encode); 134} 135inline std::string s_url_decode(const std::string& source) { 136 return s_transform(source, url_decode); 137} 138 139// Splits the source string into multiple fields separated by delimiter, 140// with duplicates of delimiter creating empty fields. 141size_t split(const std::string& source, char delimiter, 142 std::vector<std::string>* fields); 143 144// Splits the source string into multiple fields separated by delimiter, 145// with duplicates of delimiter ignored. Trailing delimiter ignored. 146size_t tokenize(const std::string& source, char delimiter, 147 std::vector<std::string>* fields); 148 149// Tokenize, including the empty tokens. 150size_t tokenize_with_empty_tokens(const std::string& source, 151 char delimiter, 152 std::vector<std::string>* fields); 153 154// Tokenize and append the tokens to fields. Return the new size of fields. 155size_t tokenize_append(const std::string& source, char delimiter, 156 std::vector<std::string>* fields); 157 158// Splits the source string into multiple fields separated by delimiter, with 159// duplicates of delimiter ignored. Trailing delimiter ignored. A substring in 160// between the start_mark and the end_mark is treated as a single field. Return 161// the size of fields. For example, if source is "filename 162// \"/Library/Application Support/media content.txt\"", delimiter is ' ', and 163// the start_mark and end_mark are '"', this method returns two fields: 164// "filename" and "/Library/Application Support/media content.txt". 165size_t tokenize(const std::string& source, char delimiter, char start_mark, 166 char end_mark, std::vector<std::string>* fields); 167 168// Extract the first token from source as separated by delimiter, with 169// duplicates of delimiter ignored. Return false if the delimiter could not be 170// found, otherwise return true. 171bool tokenize_first(const std::string& source, 172 const char delimiter, 173 std::string* token, 174 std::string* rest); 175 176// Safe sprintf to std::string 177//void sprintf(std::string& value, size_t maxlen, const char * format, ...) 178// PRINTF_FORMAT(3); 179 180// Convert arbitrary values to/from a string. 181 182template <class T> 183static bool ToString(const T &t, std::string* s) { 184 RTC_DCHECK(s); 185 std::ostringstream oss; 186 oss << std::boolalpha << t; 187 *s = oss.str(); 188 return !oss.fail(); 189} 190 191template <class T> 192static bool FromString(const std::string& s, T* t) { 193 RTC_DCHECK(t); 194 std::istringstream iss(s); 195 iss >> std::boolalpha >> *t; 196 return !iss.fail(); 197} 198 199// Inline versions of the string conversion routines. 200 201template<typename T> 202static inline std::string ToString(const T& val) { 203 std::string str; ToString(val, &str); return str; 204} 205 206template<typename T> 207static inline T FromString(const std::string& str) { 208 T val; FromString(str, &val); return val; 209} 210 211template<typename T> 212static inline T FromString(const T& defaultValue, const std::string& str) { 213 T val(defaultValue); FromString(str, &val); return val; 214} 215 216// simple function to strip out characters which shouldn't be 217// used in filenames 218char make_char_safe_for_filename(char c); 219 220////////////////////////////////////////////////////////////////////// 221 222} // namespace rtc 223 224#endif // WEBRTC_BASE_STRINGENCODE_H__ 225