String8.cpp revision d24b8183b93e781080b2c16c487e60d51c12da31
1/* 2 * Copyright (C) 2005 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <utils/String8.h> 18 19#include <utils/Log.h> 20#include <utils/String16.h> 21#include <utils/TextOutput.h> 22#include <utils/threads.h> 23 24#include <private/utils/Static.h> 25 26#include <ctype.h> 27 28namespace android { 29 30// --------------------------------------------------------------------------- 31 32static const uint32_t kByteMask = 0x000000BF; 33static const uint32_t kByteMark = 0x00000080; 34 35// Surrogates aren't valid for UTF-32 characters, so define some 36// constants that will let us screen them out. 37static const uint32_t kUnicodeSurrogateHighStart = 0x0000D800; 38static const uint32_t kUnicodeSurrogateHighEnd = 0x0000DBFF; 39static const uint32_t kUnicodeSurrogateLowStart = 0x0000DC00; 40static const uint32_t kUnicodeSurrogateLowEnd = 0x0000DFFF; 41static const uint32_t kUnicodeSurrogateStart = kUnicodeSurrogateHighStart; 42static const uint32_t kUnicodeSurrogateEnd = kUnicodeSurrogateLowEnd; 43 44// Mask used to set appropriate bits in first byte of UTF-8 sequence, 45// indexed by number of bytes in the sequence. 46static const uint32_t kFirstByteMark[] = { 47 0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0 48}; 49 50// Separator used by resource paths. This is not platform dependent contrary 51// to OS_PATH_SEPARATOR. 52#define RES_PATH_SEPARATOR '/' 53 54// Return number of utf8 bytes required for the character. 55static size_t utf32_to_utf8_bytes(uint32_t srcChar) 56{ 57 size_t bytesToWrite; 58 59 // Figure out how many bytes the result will require. 60 if (srcChar < 0x00000080) 61 { 62 bytesToWrite = 1; 63 } 64 else if (srcChar < 0x00000800) 65 { 66 bytesToWrite = 2; 67 } 68 else if (srcChar < 0x00010000) 69 { 70 if ((srcChar < kUnicodeSurrogateStart) 71 || (srcChar > kUnicodeSurrogateEnd)) 72 { 73 bytesToWrite = 3; 74 } 75 else 76 { 77 // Surrogates are invalid UTF-32 characters. 78 return 0; 79 } 80 } 81 // Max code point for Unicode is 0x0010FFFF. 82 else if (srcChar < 0x00110000) 83 { 84 bytesToWrite = 4; 85 } 86 else 87 { 88 // Invalid UTF-32 character. 89 return 0; 90 } 91 92 return bytesToWrite; 93} 94 95// Write out the source character to <dstP>. 96 97static void utf32_to_utf8(uint8_t* dstP, uint32_t srcChar, size_t bytes) 98{ 99 dstP += bytes; 100 switch (bytes) 101 { /* note: everything falls through. */ 102 case 4: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 103 case 3: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 104 case 2: *--dstP = (uint8_t)((srcChar | kByteMark) & kByteMask); srcChar >>= 6; 105 case 1: *--dstP = (uint8_t)(srcChar | kFirstByteMark[bytes]); 106 } 107} 108 109// --------------------------------------------------------------------------- 110 111static SharedBuffer* gEmptyStringBuf = NULL; 112static char* gEmptyString = NULL; 113 114extern int gDarwinCantLoadAllObjects; 115int gDarwinIsReallyAnnoying; 116 117static inline char* getEmptyString() 118{ 119 gEmptyStringBuf->acquire(); 120 return gEmptyString; 121} 122 123void initialize_string8() 124{ 125#ifdef LIBUTILS_NATIVE 126 // Bite me, Darwin! 127 gDarwinIsReallyAnnoying = gDarwinCantLoadAllObjects; 128#endif 129 130 SharedBuffer* buf = SharedBuffer::alloc(1); 131 char* str = (char*)buf->data(); 132 *str = 0; 133 gEmptyStringBuf = buf; 134 gEmptyString = str; 135} 136 137void terminate_string8() 138{ 139 SharedBuffer::bufferFromData(gEmptyString)->release(); 140 gEmptyStringBuf = NULL; 141 gEmptyString = NULL; 142} 143 144// --------------------------------------------------------------------------- 145 146static char* allocFromUTF8(const char* in, size_t len) 147{ 148 if (len > 0) { 149 SharedBuffer* buf = SharedBuffer::alloc(len+1); 150 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 151 if (buf) { 152 char* str = (char*)buf->data(); 153 memcpy(str, in, len); 154 str[len] = 0; 155 return str; 156 } 157 return NULL; 158 } 159 160 return getEmptyString(); 161} 162 163// Note: not dealing with expanding surrogate pairs. 164static char* allocFromUTF16(const char16_t* in, size_t len) 165{ 166 if (len == 0) return getEmptyString(); 167 168 size_t bytes = 0; 169 const char16_t* end = in+len; 170 const char16_t* p = in; 171 172 while (p < end) { 173 bytes += utf32_to_utf8_bytes(*p); 174 p++; 175 } 176 177 SharedBuffer* buf = SharedBuffer::alloc(bytes+1); 178 LOG_ASSERT(buf, "Unable to allocate shared buffer"); 179 if (buf) { 180 p = in; 181 char* str = (char*)buf->data(); 182 char* d = str; 183 while (p < end) { 184 uint32_t c = *p++; 185 size_t len = utf32_to_utf8_bytes(c); 186 utf32_to_utf8((uint8_t*)d, c, len); 187 d += len; 188 } 189 *d = 0; 190 191 return str; 192 } 193 194 return getEmptyString(); 195} 196 197// --------------------------------------------------------------------------- 198 199String8::String8() 200 : mString(getEmptyString()) 201{ 202} 203 204String8::String8(const String8& o) 205 : mString(o.mString) 206{ 207 SharedBuffer::bufferFromData(mString)->acquire(); 208} 209 210String8::String8(const char* o) 211 : mString(allocFromUTF8(o, strlen(o))) 212{ 213 if (mString == NULL) { 214 mString = getEmptyString(); 215 } 216} 217 218String8::String8(const char* o, size_t len) 219 : mString(allocFromUTF8(o, len)) 220{ 221 if (mString == NULL) { 222 mString = getEmptyString(); 223 } 224} 225 226String8::String8(const String16& o) 227 : mString(allocFromUTF16(o.string(), o.size())) 228{ 229} 230 231String8::String8(const char16_t* o) 232 : mString(allocFromUTF16(o, strlen16(o))) 233{ 234} 235 236String8::String8(const char16_t* o, size_t len) 237 : mString(allocFromUTF16(o, len)) 238{ 239} 240 241String8::~String8() 242{ 243 SharedBuffer::bufferFromData(mString)->release(); 244} 245 246void String8::setTo(const String8& other) 247{ 248 SharedBuffer::bufferFromData(other.mString)->acquire(); 249 SharedBuffer::bufferFromData(mString)->release(); 250 mString = other.mString; 251} 252 253status_t String8::setTo(const char* other) 254{ 255 SharedBuffer::bufferFromData(mString)->release(); 256 mString = allocFromUTF8(other, strlen(other)); 257 if (mString) return NO_ERROR; 258 259 mString = getEmptyString(); 260 return NO_MEMORY; 261} 262 263status_t String8::setTo(const char* other, size_t len) 264{ 265 SharedBuffer::bufferFromData(mString)->release(); 266 mString = allocFromUTF8(other, len); 267 if (mString) return NO_ERROR; 268 269 mString = getEmptyString(); 270 return NO_MEMORY; 271} 272 273status_t String8::setTo(const char16_t* other, size_t len) 274{ 275 SharedBuffer::bufferFromData(mString)->release(); 276 mString = allocFromUTF16(other, len); 277 if (mString) return NO_ERROR; 278 279 mString = getEmptyString(); 280 return NO_MEMORY; 281} 282 283status_t String8::append(const String8& other) 284{ 285 const size_t otherLen = other.bytes(); 286 if (bytes() == 0) { 287 setTo(other); 288 return NO_ERROR; 289 } else if (otherLen == 0) { 290 return NO_ERROR; 291 } 292 293 return real_append(other.string(), otherLen); 294} 295 296status_t String8::append(const char* other) 297{ 298 return append(other, strlen(other)); 299} 300 301status_t String8::append(const char* other, size_t otherLen) 302{ 303 if (bytes() == 0) { 304 return setTo(other, otherLen); 305 } else if (otherLen == 0) { 306 return NO_ERROR; 307 } 308 309 return real_append(other, otherLen); 310} 311 312status_t String8::real_append(const char* other, size_t otherLen) 313{ 314 const size_t myLen = bytes(); 315 316 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 317 ->editResize(myLen+otherLen+1); 318 if (buf) { 319 char* str = (char*)buf->data(); 320 mString = str; 321 str += myLen; 322 memcpy(str, other, otherLen); 323 str[otherLen] = '\0'; 324 return NO_ERROR; 325 } 326 return NO_MEMORY; 327} 328 329char* String8::lockBuffer(size_t size) 330{ 331 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 332 ->editResize(size+1); 333 if (buf) { 334 char* str = (char*)buf->data(); 335 mString = str; 336 return str; 337 } 338 return NULL; 339} 340 341void String8::unlockBuffer() 342{ 343 unlockBuffer(strlen(mString)); 344} 345 346status_t String8::unlockBuffer(size_t size) 347{ 348 if (size != this->size()) { 349 SharedBuffer* buf = SharedBuffer::bufferFromData(mString) 350 ->editResize(size+1); 351 if (buf) { 352 char* str = (char*)buf->data(); 353 str[size] = 0; 354 mString = str; 355 return NO_ERROR; 356 } 357 } 358 359 return NO_MEMORY; 360} 361 362ssize_t String8::find(const char* other, size_t start) const 363{ 364 size_t len = size(); 365 if (start >= len) { 366 return -1; 367 } 368 const char* s = mString+start; 369 const char* p = strstr(s, other); 370 return p ? p-mString : -1; 371} 372 373void String8::toLower() 374{ 375 toLower(0, size()); 376} 377 378void String8::toLower(size_t start, size_t length) 379{ 380 const size_t len = size(); 381 if (start >= len) { 382 return; 383 } 384 if (start+length > len) { 385 length = len-start; 386 } 387 char* buf = lockBuffer(len); 388 buf += start; 389 while (length > 0) { 390 *buf = tolower(*buf); 391 buf++; 392 length--; 393 } 394 unlockBuffer(len); 395} 396 397void String8::toUpper() 398{ 399 toUpper(0, size()); 400} 401 402void String8::toUpper(size_t start, size_t length) 403{ 404 const size_t len = size(); 405 if (start >= len) { 406 return; 407 } 408 if (start+length > len) { 409 length = len-start; 410 } 411 char* buf = lockBuffer(len); 412 buf += start; 413 while (length > 0) { 414 *buf = toupper(*buf); 415 buf++; 416 length--; 417 } 418 unlockBuffer(len); 419} 420 421TextOutput& operator<<(TextOutput& to, const String8& val) 422{ 423 to << val.string(); 424 return to; 425} 426 427// --------------------------------------------------------------------------- 428// Path functions 429 430 431void String8::setPathName(const char* name) 432{ 433 setPathName(name, strlen(name)); 434} 435 436void String8::setPathName(const char* name, size_t len) 437{ 438 char* buf = lockBuffer(len); 439 440 memcpy(buf, name, len); 441 442 // remove trailing path separator, if present 443 if (len > 0 && buf[len-1] == OS_PATH_SEPARATOR) 444 len--; 445 446 buf[len] = '\0'; 447 448 unlockBuffer(len); 449} 450 451String8 String8::getPathLeaf(void) const 452{ 453 const char* cp; 454 const char*const buf = mString; 455 456 cp = strrchr(buf, OS_PATH_SEPARATOR); 457 if (cp == NULL) 458 return String8(*this); 459 else 460 return String8(cp+1); 461} 462 463String8 String8::getPathDir(void) const 464{ 465 const char* cp; 466 const char*const str = mString; 467 468 cp = strrchr(str, OS_PATH_SEPARATOR); 469 if (cp == NULL) 470 return String8(""); 471 else 472 return String8(str, cp - str); 473} 474 475String8 String8::walkPath(String8* outRemains) const 476{ 477 const char* cp; 478 const char*const str = mString; 479 const char* buf = str; 480 481 cp = strchr(buf, OS_PATH_SEPARATOR); 482 if (cp == buf) { 483 // don't include a leading '/'. 484 buf = buf+1; 485 cp = strchr(buf, OS_PATH_SEPARATOR); 486 } 487 488 if (cp == NULL) { 489 String8 res = buf != str ? String8(buf) : *this; 490 if (outRemains) *outRemains = String8(""); 491 return res; 492 } 493 494 String8 res(buf, cp-buf); 495 if (outRemains) *outRemains = String8(cp+1); 496 return res; 497} 498 499/* 500 * Helper function for finding the start of an extension in a pathname. 501 * 502 * Returns a pointer inside mString, or NULL if no extension was found. 503 */ 504char* String8::find_extension(void) const 505{ 506 const char* lastSlash; 507 const char* lastDot; 508 int extLen; 509 const char* const str = mString; 510 511 // only look at the filename 512 lastSlash = strrchr(str, OS_PATH_SEPARATOR); 513 if (lastSlash == NULL) 514 lastSlash = str; 515 else 516 lastSlash++; 517 518 // find the last dot 519 lastDot = strrchr(lastSlash, '.'); 520 if (lastDot == NULL) 521 return NULL; 522 523 // looks good, ship it 524 return const_cast<char*>(lastDot); 525} 526 527String8 String8::getPathExtension(void) const 528{ 529 char* ext; 530 531 ext = find_extension(); 532 if (ext != NULL) 533 return String8(ext); 534 else 535 return String8(""); 536} 537 538String8 String8::getBasePath(void) const 539{ 540 char* ext; 541 const char* const str = mString; 542 543 ext = find_extension(); 544 if (ext == NULL) 545 return String8(*this); 546 else 547 return String8(str, ext - str); 548} 549 550String8& String8::appendPath(const char* name) 551{ 552 // TODO: The test below will fail for Win32 paths. Fix later or ignore. 553 if (name[0] != OS_PATH_SEPARATOR) { 554 if (*name == '\0') { 555 // nothing to do 556 return *this; 557 } 558 559 size_t len = length(); 560 if (len == 0) { 561 // no existing filename, just use the new one 562 setPathName(name); 563 return *this; 564 } 565 566 // make room for oldPath + '/' + newPath 567 int newlen = strlen(name); 568 569 char* buf = lockBuffer(len+1+newlen); 570 571 // insert a '/' if needed 572 if (buf[len-1] != OS_PATH_SEPARATOR) 573 buf[len++] = OS_PATH_SEPARATOR; 574 575 memcpy(buf+len, name, newlen+1); 576 len += newlen; 577 578 unlockBuffer(len); 579 580 return *this; 581 } else { 582 setPathName(name); 583 return *this; 584 } 585} 586 587String8& String8::convertToResPath() 588{ 589#if OS_PATH_SEPARATOR != RES_PATH_SEPARATOR 590 size_t len = length(); 591 if (len > 0) { 592 char * buf = lockBuffer(len); 593 for (char * end = buf + len; buf < end; ++buf) { 594 if (*buf == OS_PATH_SEPARATOR) 595 *buf = RES_PATH_SEPARATOR; 596 } 597 unlockBuffer(len); 598 } 599#endif 600 return *this; 601} 602 603 604}; // namespace android 605