String8.h revision 2881c85e38c662050e9635c6ff3861a3be09674f
1/* 2 * Copyright (C) 2005 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ANDROID_STRING8_H 18#define ANDROID_STRING8_H 19 20#include <utils/Errors.h> 21 22// Need this for the char16_t type; String8.h should not 23// be depedent on the String16 class. 24#include <utils/String16.h> 25 26#include <stdint.h> 27#include <string.h> 28#include <sys/types.h> 29 30// --------------------------------------------------------------------------- 31 32extern "C" { 33 34typedef uint32_t char32_t; 35 36size_t strlen32(const char32_t *); 37size_t strnlen32(const char32_t *, size_t); 38 39/* 40 * Returns the length of "src" when "src" is valid UTF-8 string. 41 * Returns 0 if src is NULL, 0-length string or non UTF-8 string. 42 * This function should be used to determine whether "src" is valid UTF-8 43 * characters with valid unicode codepoints. "src" must be null-terminated. 44 * 45 * If you are going to use other GetUtf... functions defined in this header 46 * with string which may not be valid UTF-8 with valid codepoint (form 0 to 47 * 0x10FFFF), you should use this function before calling others, since the 48 * other functions do not check whether the string is valid UTF-8 or not. 49 * 50 * If you do not care whether "src" is valid UTF-8 or not, you should use 51 * strlen() as usual, which should be much faster. 52 */ 53size_t utf8_length(const char *src); 54 55/* 56 * Returns the UTF-32 length of "src". 57 */ 58size_t utf32_length(const char *src, size_t src_len); 59 60/* 61 * Returns the UTF-8 length of "src". 62 */ 63size_t utf8_length_from_utf16(const char16_t *src, size_t src_len); 64 65/* 66 * Returns the UTF-8 length of "src". 67 */ 68size_t utf8_length_from_utf32(const char32_t *src, size_t src_len); 69 70/* 71 * Returns the unicode value at "index". 72 * Returns -1 when the index is invalid (equals to or more than "src_len"). 73 * If returned value is positive, it is able to be converted to char32_t, which 74 * is unsigned. Then, if "next_index" is not NULL, the next index to be used is 75 * stored in "next_index". "next_index" can be NULL. 76 */ 77int32_t utf32_at(const char *src, size_t src_len, 78 size_t index, size_t *next_index); 79 80/* 81 * Stores a UTF-32 string converted from "src" in "dst", if "dst_length" is not 82 * large enough to store the string, the part of the "src" string is stored 83 * into "dst". 84 * Returns the size actually used for storing the string. 85 * "dst" is not null-terminated when dst_len is fully used (like strncpy). 86 */ 87size_t utf8_to_utf32(const char* src, size_t src_len, 88 char32_t* dst, size_t dst_len); 89 90/* 91 * Stores a UTF-8 string converted from "src" in "dst", if "dst_length" is not 92 * large enough to store the string, the part of the "src" string is stored 93 * into "dst" as much as possible. See the examples for more detail. 94 * Returns the size actually used for storing the string. 95 * dst" is not null-terminated when dst_len is fully used (like strncpy). 96 * 97 * Example 1 98 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 99 * "src_len" == 2 100 * "dst_len" >= 7 101 * -> 102 * Returned value == 6 103 * "dst" becomes \xE3\x81\x82\xE3\x81\x84\0 104 * (note that "dst" is null-terminated) 105 * 106 * Example 2 107 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 108 * "src_len" == 2 109 * "dst_len" == 5 110 * -> 111 * Returned value == 3 112 * "dst" becomes \xE3\x81\x82\0 113 * (note that "dst" is null-terminated, but \u3044 is not stored in "dst" 114 * since "dst" does not have enough size to store the character) 115 * 116 * Example 3 117 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 118 * "src_len" == 2 119 * "dst_len" == 6 120 * -> 121 * Returned value == 6 122 * "dst" becomes \xE3\x81\x82\xE3\x81\x84 123 * (note that "dst" is NOT null-terminated, like strncpy) 124 */ 125size_t utf32_to_utf8(const char32_t* src, size_t src_len, 126 char* dst, size_t dst_len); 127 128size_t utf16_to_utf8(const char16_t* src, size_t src_len, 129 char* dst, size_t dst_len); 130 131} 132 133// --------------------------------------------------------------------------- 134 135namespace android { 136 137class TextOutput; 138 139//! This is a string holding UTF-8 characters. Does not allow the value more 140// than 0x10FFFF, which is not valid unicode codepoint. 141class String8 142{ 143public: 144 String8(); 145 String8(const String8& o); 146 explicit String8(const char* o); 147 explicit String8(const char* o, size_t numChars); 148 149 explicit String8(const String16& o); 150 explicit String8(const char16_t* o); 151 explicit String8(const char16_t* o, size_t numChars); 152 explicit String8(const char32_t* o); 153 explicit String8(const char32_t* o, size_t numChars); 154 ~String8(); 155 156 inline const char* string() const; 157 inline size_t size() const; 158 inline size_t length() const; 159 inline size_t bytes() const; 160 inline bool isEmpty() const; 161 162 inline const SharedBuffer* sharedBuffer() const; 163 164 void clear(); 165 166 void setTo(const String8& other); 167 status_t setTo(const char* other); 168 status_t setTo(const char* other, size_t numChars); 169 status_t setTo(const char16_t* other, size_t numChars); 170 status_t setTo(const char32_t* other, 171 size_t length); 172 173 status_t append(const String8& other); 174 status_t append(const char* other); 175 status_t append(const char* other, size_t numChars); 176 177 status_t appendFormat(const char* fmt, ...) 178 __attribute__((format (printf, 2, 3))); 179 180 // Note that this function takes O(N) time to calculate the value. 181 // No cache value is stored. 182 size_t getUtf32Length() const; 183 int32_t getUtf32At(size_t index, 184 size_t *next_index) const; 185 size_t getUtf32(char32_t* dst, size_t dst_len) const; 186 187 inline String8& operator=(const String8& other); 188 inline String8& operator=(const char* other); 189 190 inline String8& operator+=(const String8& other); 191 inline String8 operator+(const String8& other) const; 192 193 inline String8& operator+=(const char* other); 194 inline String8 operator+(const char* other) const; 195 196 inline int compare(const String8& other) const; 197 198 inline bool operator<(const String8& other) const; 199 inline bool operator<=(const String8& other) const; 200 inline bool operator==(const String8& other) const; 201 inline bool operator!=(const String8& other) const; 202 inline bool operator>=(const String8& other) const; 203 inline bool operator>(const String8& other) const; 204 205 inline bool operator<(const char* other) const; 206 inline bool operator<=(const char* other) const; 207 inline bool operator==(const char* other) const; 208 inline bool operator!=(const char* other) const; 209 inline bool operator>=(const char* other) const; 210 inline bool operator>(const char* other) const; 211 212 inline operator const char*() const; 213 214 char* lockBuffer(size_t size); 215 void unlockBuffer(); 216 status_t unlockBuffer(size_t size); 217 218 // return the index of the first byte of other in this at or after 219 // start, or -1 if not found 220 ssize_t find(const char* other, size_t start = 0) const; 221 222 void toLower(); 223 void toLower(size_t start, size_t numChars); 224 void toUpper(); 225 void toUpper(size_t start, size_t numChars); 226 227 /* 228 * These methods operate on the string as if it were a path name. 229 */ 230 231 /* 232 * Set the filename field to a specific value. 233 * 234 * Normalizes the filename, removing a trailing '/' if present. 235 */ 236 void setPathName(const char* name); 237 void setPathName(const char* name, size_t numChars); 238 239 /* 240 * Get just the filename component. 241 * 242 * "/tmp/foo/bar.c" --> "bar.c" 243 */ 244 String8 getPathLeaf(void) const; 245 246 /* 247 * Remove the last (file name) component, leaving just the directory 248 * name. 249 * 250 * "/tmp/foo/bar.c" --> "/tmp/foo" 251 * "/tmp" --> "" // ????? shouldn't this be "/" ???? XXX 252 * "bar.c" --> "" 253 */ 254 String8 getPathDir(void) const; 255 256 /* 257 * Retrieve the front (root dir) component. Optionally also return the 258 * remaining components. 259 * 260 * "/tmp/foo/bar.c" --> "tmp" (remain = "foo/bar.c") 261 * "/tmp" --> "tmp" (remain = "") 262 * "bar.c" --> "bar.c" (remain = "") 263 */ 264 String8 walkPath(String8* outRemains = NULL) const; 265 266 /* 267 * Return the filename extension. This is the last '.' and up to 268 * four characters that follow it. The '.' is included in case we 269 * decide to expand our definition of what constitutes an extension. 270 * 271 * "/tmp/foo/bar.c" --> ".c" 272 * "/tmp" --> "" 273 * "/tmp/foo.bar/baz" --> "" 274 * "foo.jpeg" --> ".jpeg" 275 * "foo." --> "" 276 */ 277 String8 getPathExtension(void) const; 278 279 /* 280 * Return the path without the extension. Rules for what constitutes 281 * an extension are described in the comment for getPathExtension(). 282 * 283 * "/tmp/foo/bar.c" --> "/tmp/foo/bar" 284 */ 285 String8 getBasePath(void) const; 286 287 /* 288 * Add a component to the pathname. We guarantee that there is 289 * exactly one path separator between the old path and the new. 290 * If there is no existing name, we just copy the new name in. 291 * 292 * If leaf is a fully qualified path (i.e. starts with '/', it 293 * replaces whatever was there before. 294 */ 295 String8& appendPath(const char* leaf); 296 String8& appendPath(const String8& leaf) { return appendPath(leaf.string()); } 297 298 /* 299 * Like appendPath(), but does not affect this string. Returns a new one instead. 300 */ 301 String8 appendPathCopy(const char* leaf) const 302 { String8 p(*this); p.appendPath(leaf); return p; } 303 String8 appendPathCopy(const String8& leaf) const { return appendPathCopy(leaf.string()); } 304 305 /* 306 * Converts all separators in this string to /, the default path separator. 307 * 308 * If the default OS separator is backslash, this converts all 309 * backslashes to slashes, in-place. Otherwise it does nothing. 310 * Returns self. 311 */ 312 String8& convertToResPath(); 313 314private: 315 status_t real_append(const char* other, size_t numChars); 316 char* find_extension(void) const; 317 318 const char* mString; 319}; 320 321TextOutput& operator<<(TextOutput& to, const String16& val); 322 323// --------------------------------------------------------------------------- 324// No user servicable parts below. 325 326inline int compare_type(const String8& lhs, const String8& rhs) 327{ 328 return lhs.compare(rhs); 329} 330 331inline int strictly_order_type(const String8& lhs, const String8& rhs) 332{ 333 return compare_type(lhs, rhs) < 0; 334} 335 336inline const char* String8::string() const 337{ 338 return mString; 339} 340 341inline size_t String8::length() const 342{ 343 return SharedBuffer::sizeFromData(mString)-1; 344} 345 346inline size_t String8::size() const 347{ 348 return length(); 349} 350 351inline bool String8::isEmpty() const 352{ 353 return length() == 0; 354} 355 356inline size_t String8::bytes() const 357{ 358 return SharedBuffer::sizeFromData(mString)-1; 359} 360 361inline const SharedBuffer* String8::sharedBuffer() const 362{ 363 return SharedBuffer::bufferFromData(mString); 364} 365 366inline String8& String8::operator=(const String8& other) 367{ 368 setTo(other); 369 return *this; 370} 371 372inline String8& String8::operator=(const char* other) 373{ 374 setTo(other); 375 return *this; 376} 377 378inline String8& String8::operator+=(const String8& other) 379{ 380 append(other); 381 return *this; 382} 383 384inline String8 String8::operator+(const String8& other) const 385{ 386 String8 tmp(*this); 387 tmp += other; 388 return tmp; 389} 390 391inline String8& String8::operator+=(const char* other) 392{ 393 append(other); 394 return *this; 395} 396 397inline String8 String8::operator+(const char* other) const 398{ 399 String8 tmp(*this); 400 tmp += other; 401 return tmp; 402} 403 404inline int String8::compare(const String8& other) const 405{ 406 return strcmp(mString, other.mString); 407} 408 409inline bool String8::operator<(const String8& other) const 410{ 411 return strcmp(mString, other.mString) < 0; 412} 413 414inline bool String8::operator<=(const String8& other) const 415{ 416 return strcmp(mString, other.mString) <= 0; 417} 418 419inline bool String8::operator==(const String8& other) const 420{ 421 return (SharedBuffer::sizeFromData(mString) == 422 SharedBuffer::sizeFromData(other.mString)) && 423 strcmp(mString, other.mString) == 0; 424} 425 426inline bool String8::operator!=(const String8& other) const 427{ 428 return strcmp(mString, other.mString) != 0; 429} 430 431inline bool String8::operator>=(const String8& other) const 432{ 433 return strcmp(mString, other.mString) >= 0; 434} 435 436inline bool String8::operator>(const String8& other) const 437{ 438 return strcmp(mString, other.mString) > 0; 439} 440 441inline bool String8::operator<(const char* other) const 442{ 443 return strcmp(mString, other) < 0; 444} 445 446inline bool String8::operator<=(const char* other) const 447{ 448 return strcmp(mString, other) <= 0; 449} 450 451inline bool String8::operator==(const char* other) const 452{ 453 return strcmp(mString, other) == 0; 454} 455 456inline bool String8::operator!=(const char* other) const 457{ 458 return strcmp(mString, other) != 0; 459} 460 461inline bool String8::operator>=(const char* other) const 462{ 463 return strcmp(mString, other) >= 0; 464} 465 466inline bool String8::operator>(const char* other) const 467{ 468 return strcmp(mString, other) > 0; 469} 470 471inline String8::operator const char*() const 472{ 473 return mString; 474} 475 476} // namespace android 477 478// --------------------------------------------------------------------------- 479 480#endif // ANDROID_STRING8_H 481