1// Copyright 2014 PDFium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7#ifndef CORE_FXCRT_FX_STRING_H_ 8#define CORE_FXCRT_FX_STRING_H_ 9 10#include <stdint.h> // For intptr_t. 11 12#include <algorithm> 13#include <functional> 14 15#include "core/fxcrt/cfx_retain_ptr.h" 16#include "core/fxcrt/cfx_string_c_template.h" 17#include "core/fxcrt/cfx_string_data_template.h" 18#include "core/fxcrt/fx_memory.h" 19#include "core/fxcrt/fx_system.h" 20 21class CFX_ByteString; 22class CFX_WideString; 23 24using CFX_ByteStringC = CFX_StringCTemplate<FX_CHAR>; 25using CFX_WideStringC = CFX_StringCTemplate<FX_WCHAR>; 26 27#define FXBSTR_ID(c1, c2, c3, c4) \ 28 (((uint32_t)c1 << 24) | ((uint32_t)c2 << 16) | ((uint32_t)c3 << 8) | \ 29 ((uint32_t)c4)) 30 31// A mutable string with shared buffers using copy-on-write semantics that 32// avoids the cost of std::string's iterator stability guarantees. 33class CFX_ByteString { 34 public: 35 using CharType = FX_CHAR; 36 37 CFX_ByteString(); 38 CFX_ByteString(const CFX_ByteString& other); 39 CFX_ByteString(CFX_ByteString&& other); 40 41 // Deliberately implicit to avoid calling on every string literal. 42 // NOLINTNEXTLINE(runtime/explicit) 43 CFX_ByteString(char ch); 44 // NOLINTNEXTLINE(runtime/explicit) 45 CFX_ByteString(const FX_CHAR* ptr); 46 47 CFX_ByteString(const FX_CHAR* ptr, FX_STRSIZE len); 48 CFX_ByteString(const uint8_t* ptr, FX_STRSIZE len); 49 50 explicit CFX_ByteString(const CFX_ByteStringC& bstrc); 51 CFX_ByteString(const CFX_ByteStringC& bstrc1, const CFX_ByteStringC& bstrc2); 52 53 ~CFX_ByteString(); 54 55 void clear() { m_pData.Reset(); } 56 57 static CFX_ByteString FromUnicode(const FX_WCHAR* ptr, FX_STRSIZE len = -1); 58 static CFX_ByteString FromUnicode(const CFX_WideString& str); 59 60 // Explicit conversion to C-style string. 61 // Note: Any subsequent modification of |this| will invalidate the result. 62 const FX_CHAR* c_str() const { return m_pData ? m_pData->m_String : ""; } 63 64 // Explicit conversion to uint8_t*. 65 // Note: Any subsequent modification of |this| will invalidate the result. 66 const uint8_t* raw_str() const { 67 return m_pData ? reinterpret_cast<const uint8_t*>(m_pData->m_String) 68 : nullptr; 69 } 70 71 // Explicit conversion to CFX_ByteStringC. 72 // Note: Any subsequent modification of |this| will invalidate the result. 73 CFX_ByteStringC AsStringC() const { 74 return CFX_ByteStringC(raw_str(), GetLength()); 75 } 76 77 FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; } 78 bool IsEmpty() const { return !GetLength(); } 79 80 int Compare(const CFX_ByteStringC& str) const; 81 bool EqualNoCase(const CFX_ByteStringC& str) const; 82 83 bool operator==(const char* ptr) const; 84 bool operator==(const CFX_ByteStringC& str) const; 85 bool operator==(const CFX_ByteString& other) const; 86 87 bool operator!=(const char* ptr) const { return !(*this == ptr); } 88 bool operator!=(const CFX_ByteStringC& str) const { return !(*this == str); } 89 bool operator!=(const CFX_ByteString& other) const { 90 return !(*this == other); 91 } 92 93 bool operator<(const CFX_ByteString& str) const; 94 95 const CFX_ByteString& operator=(const FX_CHAR* str); 96 const CFX_ByteString& operator=(const CFX_ByteStringC& bstrc); 97 const CFX_ByteString& operator=(const CFX_ByteString& stringSrc); 98 99 const CFX_ByteString& operator+=(FX_CHAR ch); 100 const CFX_ByteString& operator+=(const FX_CHAR* str); 101 const CFX_ByteString& operator+=(const CFX_ByteString& str); 102 const CFX_ByteString& operator+=(const CFX_ByteStringC& bstrc); 103 104 uint8_t GetAt(FX_STRSIZE nIndex) const { 105 return m_pData ? m_pData->m_String[nIndex] : 0; 106 } 107 108 uint8_t operator[](FX_STRSIZE nIndex) const { 109 return m_pData ? m_pData->m_String[nIndex] : 0; 110 } 111 112 void SetAt(FX_STRSIZE nIndex, FX_CHAR ch); 113 FX_STRSIZE Insert(FX_STRSIZE index, FX_CHAR ch); 114 FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1); 115 116 void Format(const FX_CHAR* lpszFormat, ...); 117 void FormatV(const FX_CHAR* lpszFormat, va_list argList); 118 119 void Reserve(FX_STRSIZE len); 120 FX_CHAR* GetBuffer(FX_STRSIZE len); 121 void ReleaseBuffer(FX_STRSIZE len = -1); 122 123 CFX_ByteString Mid(FX_STRSIZE first) const; 124 CFX_ByteString Mid(FX_STRSIZE first, FX_STRSIZE count) const; 125 CFX_ByteString Left(FX_STRSIZE count) const; 126 CFX_ByteString Right(FX_STRSIZE count) const; 127 128 FX_STRSIZE Find(const CFX_ByteStringC& lpszSub, FX_STRSIZE start = 0) const; 129 FX_STRSIZE Find(FX_CHAR ch, FX_STRSIZE start = 0) const; 130 FX_STRSIZE ReverseFind(FX_CHAR ch) const; 131 132 void MakeLower(); 133 void MakeUpper(); 134 135 void TrimRight(); 136 void TrimRight(FX_CHAR chTarget); 137 void TrimRight(const CFX_ByteStringC& lpszTargets); 138 139 void TrimLeft(); 140 void TrimLeft(FX_CHAR chTarget); 141 void TrimLeft(const CFX_ByteStringC& lpszTargets); 142 143 FX_STRSIZE Replace(const CFX_ByteStringC& lpszOld, 144 const CFX_ByteStringC& lpszNew); 145 146 FX_STRSIZE Remove(FX_CHAR ch); 147 148 CFX_WideString UTF8Decode() const; 149 150 uint32_t GetID(FX_STRSIZE start_pos = 0) const; 151 152#define FXFORMAT_SIGNED 1 153#define FXFORMAT_HEX 2 154#define FXFORMAT_CAPITAL 4 155 156 static CFX_ByteString FormatInteger(int i, uint32_t flags = 0); 157 static CFX_ByteString FormatFloat(FX_FLOAT f, int precision = 0); 158 159 protected: 160 using StringData = CFX_StringDataTemplate<FX_CHAR>; 161 162 void ReallocBeforeWrite(FX_STRSIZE nNewLen); 163 void AllocBeforeWrite(FX_STRSIZE nNewLen); 164 void AllocCopy(CFX_ByteString& dest, 165 FX_STRSIZE nCopyLen, 166 FX_STRSIZE nCopyIndex) const; 167 void AssignCopy(const FX_CHAR* pSrcData, FX_STRSIZE nSrcLen); 168 void Concat(const FX_CHAR* lpszSrcData, FX_STRSIZE nSrcLen); 169 170 CFX_RetainPtr<StringData> m_pData; 171 172 friend class fxcrt_ByteStringConcat_Test; 173 friend class fxcrt_ByteStringPool_Test; 174}; 175 176inline bool operator==(const char* lhs, const CFX_ByteString& rhs) { 177 return rhs == lhs; 178} 179inline bool operator==(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) { 180 return rhs == lhs; 181} 182inline bool operator!=(const char* lhs, const CFX_ByteString& rhs) { 183 return rhs != lhs; 184} 185inline bool operator!=(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) { 186 return rhs != lhs; 187} 188 189inline CFX_ByteString operator+(const CFX_ByteStringC& str1, 190 const CFX_ByteStringC& str2) { 191 return CFX_ByteString(str1, str2); 192} 193inline CFX_ByteString operator+(const CFX_ByteStringC& str1, 194 const FX_CHAR* str2) { 195 return CFX_ByteString(str1, str2); 196} 197inline CFX_ByteString operator+(const FX_CHAR* str1, 198 const CFX_ByteStringC& str2) { 199 return CFX_ByteString(str1, str2); 200} 201inline CFX_ByteString operator+(const CFX_ByteStringC& str1, FX_CHAR ch) { 202 return CFX_ByteString(str1, CFX_ByteStringC(ch)); 203} 204inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteStringC& str2) { 205 return CFX_ByteString(ch, str2); 206} 207inline CFX_ByteString operator+(const CFX_ByteString& str1, 208 const CFX_ByteString& str2) { 209 return CFX_ByteString(str1.AsStringC(), str2.AsStringC()); 210} 211inline CFX_ByteString operator+(const CFX_ByteString& str1, FX_CHAR ch) { 212 return CFX_ByteString(str1.AsStringC(), CFX_ByteStringC(ch)); 213} 214inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteString& str2) { 215 return CFX_ByteString(ch, str2.AsStringC()); 216} 217inline CFX_ByteString operator+(const CFX_ByteString& str1, 218 const FX_CHAR* str2) { 219 return CFX_ByteString(str1.AsStringC(), str2); 220} 221inline CFX_ByteString operator+(const FX_CHAR* str1, 222 const CFX_ByteString& str2) { 223 return CFX_ByteString(str1, str2.AsStringC()); 224} 225inline CFX_ByteString operator+(const CFX_ByteString& str1, 226 const CFX_ByteStringC& str2) { 227 return CFX_ByteString(str1.AsStringC(), str2); 228} 229inline CFX_ByteString operator+(const CFX_ByteStringC& str1, 230 const CFX_ByteString& str2) { 231 return CFX_ByteString(str1, str2.AsStringC()); 232} 233 234// A mutable string with shared buffers using copy-on-write semantics that 235// avoids the cost of std::string's iterator stability guarantees. 236class CFX_WideString { 237 public: 238 using CharType = FX_WCHAR; 239 240 CFX_WideString(); 241 CFX_WideString(const CFX_WideString& other); 242 CFX_WideString(CFX_WideString&& other); 243 244 // Deliberately implicit to avoid calling on every string literal. 245 // NOLINTNEXTLINE(runtime/explicit) 246 CFX_WideString(FX_WCHAR ch); 247 // NOLINTNEXTLINE(runtime/explicit) 248 CFX_WideString(const FX_WCHAR* ptr); 249 250 CFX_WideString(const FX_WCHAR* ptr, FX_STRSIZE len); 251 252 explicit CFX_WideString(const CFX_WideStringC& str); 253 CFX_WideString(const CFX_WideStringC& str1, const CFX_WideStringC& str2); 254 255 ~CFX_WideString(); 256 257 static CFX_WideString FromLocal(const CFX_ByteStringC& str); 258 static CFX_WideString FromCodePage(const CFX_ByteStringC& str, 259 uint16_t codepage); 260 261 static CFX_WideString FromUTF8(const CFX_ByteStringC& str); 262 static CFX_WideString FromUTF16LE(const unsigned short* str, FX_STRSIZE len); 263 264 static FX_STRSIZE WStringLength(const unsigned short* str); 265 266 // Explicit conversion to C-style wide string. 267 // Note: Any subsequent modification of |this| will invalidate the result. 268 const FX_WCHAR* c_str() const { return m_pData ? m_pData->m_String : L""; } 269 270 // Explicit conversion to CFX_WideStringC. 271 // Note: Any subsequent modification of |this| will invalidate the result. 272 CFX_WideStringC AsStringC() const { 273 return CFX_WideStringC(c_str(), GetLength()); 274 } 275 276 void clear() { m_pData.Reset(); } 277 278 FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; } 279 bool IsEmpty() const { return !GetLength(); } 280 281 const CFX_WideString& operator=(const FX_WCHAR* str); 282 const CFX_WideString& operator=(const CFX_WideString& stringSrc); 283 const CFX_WideString& operator=(const CFX_WideStringC& stringSrc); 284 285 const CFX_WideString& operator+=(const FX_WCHAR* str); 286 const CFX_WideString& operator+=(FX_WCHAR ch); 287 const CFX_WideString& operator+=(const CFX_WideString& str); 288 const CFX_WideString& operator+=(const CFX_WideStringC& str); 289 290 bool operator==(const wchar_t* ptr) const; 291 bool operator==(const CFX_WideStringC& str) const; 292 bool operator==(const CFX_WideString& other) const; 293 294 bool operator!=(const wchar_t* ptr) const { return !(*this == ptr); } 295 bool operator!=(const CFX_WideStringC& str) const { return !(*this == str); } 296 bool operator!=(const CFX_WideString& other) const { 297 return !(*this == other); 298 } 299 300 bool operator<(const CFX_WideString& str) const; 301 302 FX_WCHAR GetAt(FX_STRSIZE nIndex) const { 303 return m_pData ? m_pData->m_String[nIndex] : 0; 304 } 305 306 FX_WCHAR operator[](FX_STRSIZE nIndex) const { 307 return m_pData ? m_pData->m_String[nIndex] : 0; 308 } 309 310 void SetAt(FX_STRSIZE nIndex, FX_WCHAR ch); 311 312 int Compare(const FX_WCHAR* str) const; 313 int Compare(const CFX_WideString& str) const; 314 int CompareNoCase(const FX_WCHAR* str) const; 315 316 CFX_WideString Mid(FX_STRSIZE first) const; 317 CFX_WideString Mid(FX_STRSIZE first, FX_STRSIZE count) const; 318 CFX_WideString Left(FX_STRSIZE count) const; 319 CFX_WideString Right(FX_STRSIZE count) const; 320 321 FX_STRSIZE Insert(FX_STRSIZE index, FX_WCHAR ch); 322 FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1); 323 324 void Format(const FX_WCHAR* lpszFormat, ...); 325 void FormatV(const FX_WCHAR* lpszFormat, va_list argList); 326 327 void MakeLower(); 328 void MakeUpper(); 329 330 void TrimRight(); 331 void TrimRight(FX_WCHAR chTarget); 332 void TrimRight(const CFX_WideStringC& pTargets); 333 334 void TrimLeft(); 335 void TrimLeft(FX_WCHAR chTarget); 336 void TrimLeft(const CFX_WideStringC& pTargets); 337 338 void Reserve(FX_STRSIZE len); 339 FX_WCHAR* GetBuffer(FX_STRSIZE len); 340 void ReleaseBuffer(FX_STRSIZE len = -1); 341 342 int GetInteger() const; 343 FX_FLOAT GetFloat() const; 344 345 FX_STRSIZE Find(const CFX_WideStringC& pSub, FX_STRSIZE start = 0) const; 346 FX_STRSIZE Find(FX_WCHAR ch, FX_STRSIZE start = 0) const; 347 FX_STRSIZE Replace(const CFX_WideStringC& pOld, const CFX_WideStringC& pNew); 348 FX_STRSIZE Remove(FX_WCHAR ch); 349 350 CFX_ByteString UTF8Encode() const; 351 CFX_ByteString UTF16LE_Encode() const; 352 353 protected: 354 using StringData = CFX_StringDataTemplate<FX_WCHAR>; 355 356 void ReallocBeforeWrite(FX_STRSIZE nLen); 357 void AllocBeforeWrite(FX_STRSIZE nLen); 358 void AllocCopy(CFX_WideString& dest, 359 FX_STRSIZE nCopyLen, 360 FX_STRSIZE nCopyIndex) const; 361 void AssignCopy(const FX_WCHAR* pSrcData, FX_STRSIZE nSrcLen); 362 void Concat(const FX_WCHAR* lpszSrcData, FX_STRSIZE nSrcLen); 363 364 CFX_RetainPtr<StringData> m_pData; 365 366 friend class fxcrt_WideStringConcatInPlace_Test; 367 friend class fxcrt_WideStringPool_Test; 368}; 369 370inline CFX_WideString operator+(const CFX_WideStringC& str1, 371 const CFX_WideStringC& str2) { 372 return CFX_WideString(str1, str2); 373} 374inline CFX_WideString operator+(const CFX_WideStringC& str1, 375 const FX_WCHAR* str2) { 376 return CFX_WideString(str1, str2); 377} 378inline CFX_WideString operator+(const FX_WCHAR* str1, 379 const CFX_WideStringC& str2) { 380 return CFX_WideString(str1, str2); 381} 382inline CFX_WideString operator+(const CFX_WideStringC& str1, FX_WCHAR ch) { 383 return CFX_WideString(str1, CFX_WideStringC(ch)); 384} 385inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideStringC& str2) { 386 return CFX_WideString(ch, str2); 387} 388inline CFX_WideString operator+(const CFX_WideString& str1, 389 const CFX_WideString& str2) { 390 return CFX_WideString(str1.AsStringC(), str2.AsStringC()); 391} 392inline CFX_WideString operator+(const CFX_WideString& str1, FX_WCHAR ch) { 393 return CFX_WideString(str1.AsStringC(), CFX_WideStringC(ch)); 394} 395inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideString& str2) { 396 return CFX_WideString(ch, str2.AsStringC()); 397} 398inline CFX_WideString operator+(const CFX_WideString& str1, 399 const FX_WCHAR* str2) { 400 return CFX_WideString(str1.AsStringC(), str2); 401} 402inline CFX_WideString operator+(const FX_WCHAR* str1, 403 const CFX_WideString& str2) { 404 return CFX_WideString(str1, str2.AsStringC()); 405} 406inline CFX_WideString operator+(const CFX_WideString& str1, 407 const CFX_WideStringC& str2) { 408 return CFX_WideString(str1.AsStringC(), str2); 409} 410inline CFX_WideString operator+(const CFX_WideStringC& str1, 411 const CFX_WideString& str2) { 412 return CFX_WideString(str1, str2.AsStringC()); 413} 414inline bool operator==(const wchar_t* lhs, const CFX_WideString& rhs) { 415 return rhs == lhs; 416} 417inline bool operator==(const CFX_WideStringC& lhs, const CFX_WideString& rhs) { 418 return rhs == lhs; 419} 420inline bool operator!=(const wchar_t* lhs, const CFX_WideString& rhs) { 421 return rhs != lhs; 422} 423inline bool operator!=(const CFX_WideStringC& lhs, const CFX_WideString& rhs) { 424 return rhs != lhs; 425} 426 427CFX_ByteString FX_UTF8Encode(const CFX_WideStringC& wsStr); 428FX_FLOAT FX_atof(const CFX_ByteStringC& str); 429inline FX_FLOAT FX_atof(const CFX_WideStringC& wsStr) { 430 return FX_atof(FX_UTF8Encode(wsStr).c_str()); 431} 432bool FX_atonum(const CFX_ByteStringC& str, void* pData); 433FX_STRSIZE FX_ftoa(FX_FLOAT f, FX_CHAR* buf); 434 435uint32_t FX_HashCode_GetA(const CFX_ByteStringC& str, bool bIgnoreCase); 436uint32_t FX_HashCode_GetW(const CFX_WideStringC& str, bool bIgnoreCase); 437 438namespace std { 439 440template <> 441struct hash<CFX_ByteString> { 442 std::size_t operator()(const CFX_ByteString& str) const { 443 return FX_HashCode_GetA(str.AsStringC(), false); 444 } 445}; 446 447template <> 448struct hash<CFX_WideString> { 449 std::size_t operator()(const CFX_WideString& str) const { 450 return FX_HashCode_GetW(str.AsStringC(), false); 451 } 452}; 453 454} // namespace std 455 456extern template struct std::hash<CFX_ByteString>; 457extern template struct std::hash<CFX_WideString>; 458 459#endif // CORE_FXCRT_FX_STRING_H_ 460