String8.h revision 23b4a0936f1ee11e587b7be9dc3bcae5b55d31cf
1/* 2 * Copyright (C) 2005 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ANDROID_STRING8_H 18#define ANDROID_STRING8_H 19 20#include <utils/Errors.h> 21 22// Need this for the char16_t type; String8.h should not 23// be depedent on the String16 class. 24#include <utils/String16.h> 25 26#include <stdint.h> 27#include <string.h> 28#include <sys/types.h> 29 30// --------------------------------------------------------------------------- 31 32extern "C" { 33 34typedef uint32_t char32_t; 35 36size_t strlen32(const char32_t *); 37size_t strnlen32(const char32_t *, size_t); 38 39/* 40 * Returns the length of "src" when "src" is valid UTF-8 string. 41 * Returns 0 if src is NULL, 0-length string or non UTF-8 string. 42 * This function should be used to determine whether "src" is valid UTF-8 43 * characters with valid unicode codepoints. "src" must be null-terminated. 44 * 45 * If you are going to use other GetUtf... functions defined in this header 46 * with string which may not be valid UTF-8 with valid codepoint (form 0 to 47 * 0x10FFFF), you should use this function before calling others, since the 48 * other functions do not check whether the string is valid UTF-8 or not. 49 * 50 * If you do not care whether "src" is valid UTF-8 or not, you should use 51 * strlen() as usual, which should be much faster. 52 */ 53size_t utf8_length(const char *src); 54 55/* 56 * Returns the UTF-32 length of "src". 57 */ 58size_t utf32_length(const char *src, size_t src_len); 59 60/* 61 * Returns the UTF-8 length of "src". 62 */ 63size_t utf8_length_from_utf16(const char16_t *src, size_t src_len); 64 65/* 66 * Returns the UTF-8 length of "src". 67 */ 68size_t utf8_length_from_utf32(const char32_t *src, size_t src_len); 69 70/* 71 * Returns the unicode value at "index". 72 * Returns -1 when the index is invalid (equals to or more than "src_len"). 73 * If returned value is positive, it is able to be converted to char32_t, which 74 * is unsigned. Then, if "next_index" is not NULL, the next index to be used is 75 * stored in "next_index". "next_index" can be NULL. 76 */ 77int32_t utf32_at(const char *src, size_t src_len, 78 size_t index, size_t *next_index); 79 80/* 81 * Stores a UTF-32 string converted from "src" in "dst", if "dst_length" is not 82 * large enough to store the string, the part of the "src" string is stored 83 * into "dst". 84 * Returns the size actually used for storing the string. 85 * "dst" is not null-terminated when dst_len is fully used (like strncpy). 86 */ 87size_t utf8_to_utf32(const char* src, size_t src_len, 88 char32_t* dst, size_t dst_len); 89 90/* 91 * Stores a UTF-8 string converted from "src" in "dst", if "dst_length" is not 92 * large enough to store the string, the part of the "src" string is stored 93 * into "dst" as much as possible. See the examples for more detail. 94 * Returns the size actually used for storing the string. 95 * dst" is not null-terminated when dst_len is fully used (like strncpy). 96 * 97 * Example 1 98 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 99 * "src_len" == 2 100 * "dst_len" >= 7 101 * -> 102 * Returned value == 6 103 * "dst" becomes \xE3\x81\x82\xE3\x81\x84\0 104 * (note that "dst" is null-terminated) 105 * 106 * Example 2 107 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 108 * "src_len" == 2 109 * "dst_len" == 5 110 * -> 111 * Returned value == 3 112 * "dst" becomes \xE3\x81\x82\0 113 * (note that "dst" is null-terminated, but \u3044 is not stored in "dst" 114 * since "dst" does not have enough size to store the character) 115 * 116 * Example 3 117 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 118 * "src_len" == 2 119 * "dst_len" == 6 120 * -> 121 * Returned value == 6 122 * "dst" becomes \xE3\x81\x82\xE3\x81\x84 123 * (note that "dst" is NOT null-terminated, like strncpy) 124 */ 125size_t utf32_to_utf8(const char32_t* src, size_t src_len, 126 char* dst, size_t dst_len); 127 128size_t utf16_to_utf8(const char16_t* src, size_t src_len, 129 char* dst, size_t dst_len); 130 131} 132 133// --------------------------------------------------------------------------- 134 135namespace android { 136 137class TextOutput; 138 139//! This is a string holding UTF-8 characters. Does not allow the value more 140// than 0x10FFFF, which is not valid unicode codepoint. 141class String8 142{ 143public: 144 String8(); 145 String8(const String8& o); 146 explicit String8(const char* o); 147 explicit String8(const char* o, size_t numChars); 148 149 explicit String8(const String16& o); 150 explicit String8(const char16_t* o); 151 explicit String8(const char16_t* o, size_t numChars); 152 explicit String8(const char32_t* o); 153 explicit String8(const char32_t* o, size_t numChars); 154 ~String8(); 155 156 inline const char* string() const; 157 inline size_t size() const; 158 inline size_t length() const; 159 inline size_t bytes() const; 160 161 inline const SharedBuffer* sharedBuffer() const; 162 163 void setTo(const String8& other); 164 status_t setTo(const char* other); 165 status_t setTo(const char* other, size_t numChars); 166 status_t setTo(const char16_t* other, size_t numChars); 167 status_t setTo(const char32_t* other, 168 size_t length); 169 170 status_t append(const String8& other); 171 status_t append(const char* other); 172 status_t append(const char* other, size_t numChars); 173 174 status_t appendFormat(const char* fmt, ...); 175 176 // Note that this function takes O(N) time to calculate the value. 177 // No cache value is stored. 178 size_t getUtf32Length() const; 179 int32_t getUtf32At(size_t index, 180 size_t *next_index) const; 181 size_t getUtf32(char32_t* dst, size_t dst_len) const; 182 183 inline String8& operator=(const String8& other); 184 inline String8& operator=(const char* other); 185 186 inline String8& operator+=(const String8& other); 187 inline String8 operator+(const String8& other) const; 188 189 inline String8& operator+=(const char* other); 190 inline String8 operator+(const char* other) const; 191 192 inline int compare(const String8& other) const; 193 194 inline bool operator<(const String8& other) const; 195 inline bool operator<=(const String8& other) const; 196 inline bool operator==(const String8& other) const; 197 inline bool operator!=(const String8& other) const; 198 inline bool operator>=(const String8& other) const; 199 inline bool operator>(const String8& other) const; 200 201 inline bool operator<(const char* other) const; 202 inline bool operator<=(const char* other) const; 203 inline bool operator==(const char* other) const; 204 inline bool operator!=(const char* other) const; 205 inline bool operator>=(const char* other) const; 206 inline bool operator>(const char* other) const; 207 208 inline operator const char*() const; 209 210 char* lockBuffer(size_t size); 211 void unlockBuffer(); 212 status_t unlockBuffer(size_t size); 213 214 // return the index of the first byte of other in this at or after 215 // start, or -1 if not found 216 ssize_t find(const char* other, size_t start = 0) const; 217 218 void toLower(); 219 void toLower(size_t start, size_t numChars); 220 void toUpper(); 221 void toUpper(size_t start, size_t numChars); 222 223 /* 224 * These methods operate on the string as if it were a path name. 225 */ 226 227 /* 228 * Set the filename field to a specific value. 229 * 230 * Normalizes the filename, removing a trailing '/' if present. 231 */ 232 void setPathName(const char* name); 233 void setPathName(const char* name, size_t numChars); 234 235 /* 236 * Get just the filename component. 237 * 238 * "/tmp/foo/bar.c" --> "bar.c" 239 */ 240 String8 getPathLeaf(void) const; 241 242 /* 243 * Remove the last (file name) component, leaving just the directory 244 * name. 245 * 246 * "/tmp/foo/bar.c" --> "/tmp/foo" 247 * "/tmp" --> "" // ????? shouldn't this be "/" ???? XXX 248 * "bar.c" --> "" 249 */ 250 String8 getPathDir(void) const; 251 252 /* 253 * Retrieve the front (root dir) component. Optionally also return the 254 * remaining components. 255 * 256 * "/tmp/foo/bar.c" --> "tmp" (remain = "foo/bar.c") 257 * "/tmp" --> "tmp" (remain = "") 258 * "bar.c" --> "bar.c" (remain = "") 259 */ 260 String8 walkPath(String8* outRemains = NULL) const; 261 262 /* 263 * Return the filename extension. This is the last '.' and up to 264 * four characters that follow it. The '.' is included in case we 265 * decide to expand our definition of what constitutes an extension. 266 * 267 * "/tmp/foo/bar.c" --> ".c" 268 * "/tmp" --> "" 269 * "/tmp/foo.bar/baz" --> "" 270 * "foo.jpeg" --> ".jpeg" 271 * "foo." --> "" 272 */ 273 String8 getPathExtension(void) const; 274 275 /* 276 * Return the path without the extension. Rules for what constitutes 277 * an extension are described in the comment for getPathExtension(). 278 * 279 * "/tmp/foo/bar.c" --> "/tmp/foo/bar" 280 */ 281 String8 getBasePath(void) const; 282 283 /* 284 * Add a component to the pathname. We guarantee that there is 285 * exactly one path separator between the old path and the new. 286 * If there is no existing name, we just copy the new name in. 287 * 288 * If leaf is a fully qualified path (i.e. starts with '/', it 289 * replaces whatever was there before. 290 */ 291 String8& appendPath(const char* leaf); 292 String8& appendPath(const String8& leaf) { return appendPath(leaf.string()); } 293 294 /* 295 * Like appendPath(), but does not affect this string. Returns a new one instead. 296 */ 297 String8 appendPathCopy(const char* leaf) const 298 { String8 p(*this); p.appendPath(leaf); return p; } 299 String8 appendPathCopy(const String8& leaf) const { return appendPathCopy(leaf.string()); } 300 301 /* 302 * Converts all separators in this string to /, the default path separator. 303 * 304 * If the default OS separator is backslash, this converts all 305 * backslashes to slashes, in-place. Otherwise it does nothing. 306 * Returns self. 307 */ 308 String8& convertToResPath(); 309 310private: 311 status_t real_append(const char* other, size_t numChars); 312 char* find_extension(void) const; 313 314 const char* mString; 315}; 316 317TextOutput& operator<<(TextOutput& to, const String16& val); 318 319// --------------------------------------------------------------------------- 320// No user servicable parts below. 321 322inline int compare_type(const String8& lhs, const String8& rhs) 323{ 324 return lhs.compare(rhs); 325} 326 327inline int strictly_order_type(const String8& lhs, const String8& rhs) 328{ 329 return compare_type(lhs, rhs) < 0; 330} 331 332inline const char* String8::string() const 333{ 334 return mString; 335} 336 337inline size_t String8::length() const 338{ 339 return SharedBuffer::sizeFromData(mString)-1; 340} 341 342inline size_t String8::size() const 343{ 344 return length(); 345} 346 347inline size_t String8::bytes() const 348{ 349 return SharedBuffer::sizeFromData(mString)-1; 350} 351 352inline const SharedBuffer* String8::sharedBuffer() const 353{ 354 return SharedBuffer::bufferFromData(mString); 355} 356 357inline String8& String8::operator=(const String8& other) 358{ 359 setTo(other); 360 return *this; 361} 362 363inline String8& String8::operator=(const char* other) 364{ 365 setTo(other); 366 return *this; 367} 368 369inline String8& String8::operator+=(const String8& other) 370{ 371 append(other); 372 return *this; 373} 374 375inline String8 String8::operator+(const String8& other) const 376{ 377 String8 tmp(*this); 378 tmp += other; 379 return tmp; 380} 381 382inline String8& String8::operator+=(const char* other) 383{ 384 append(other); 385 return *this; 386} 387 388inline String8 String8::operator+(const char* other) const 389{ 390 String8 tmp(*this); 391 tmp += other; 392 return tmp; 393} 394 395inline int String8::compare(const String8& other) const 396{ 397 return strcmp(mString, other.mString); 398} 399 400inline bool String8::operator<(const String8& other) const 401{ 402 return strcmp(mString, other.mString) < 0; 403} 404 405inline bool String8::operator<=(const String8& other) const 406{ 407 return strcmp(mString, other.mString) <= 0; 408} 409 410inline bool String8::operator==(const String8& other) const 411{ 412 return strcmp(mString, other.mString) == 0; 413} 414 415inline bool String8::operator!=(const String8& other) const 416{ 417 return strcmp(mString, other.mString) != 0; 418} 419 420inline bool String8::operator>=(const String8& other) const 421{ 422 return strcmp(mString, other.mString) >= 0; 423} 424 425inline bool String8::operator>(const String8& other) const 426{ 427 return strcmp(mString, other.mString) > 0; 428} 429 430inline bool String8::operator<(const char* other) const 431{ 432 return strcmp(mString, other) < 0; 433} 434 435inline bool String8::operator<=(const char* other) const 436{ 437 return strcmp(mString, other) <= 0; 438} 439 440inline bool String8::operator==(const char* other) const 441{ 442 return strcmp(mString, other) == 0; 443} 444 445inline bool String8::operator!=(const char* other) const 446{ 447 return strcmp(mString, other) != 0; 448} 449 450inline bool String8::operator>=(const char* other) const 451{ 452 return strcmp(mString, other) >= 0; 453} 454 455inline bool String8::operator>(const char* other) const 456{ 457 return strcmp(mString, other) > 0; 458} 459 460inline String8::operator const char*() const 461{ 462 return mString; 463} 464 465} // namespace android 466 467// --------------------------------------------------------------------------- 468 469#endif // ANDROID_STRING8_H 470