1// Copyright 2014 PDFium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef CORE_FXCRT_FX_STRING_H_
8#define CORE_FXCRT_FX_STRING_H_
9
10#include <stdint.h>  // For intptr_t.
11
12#include <algorithm>
13#include <functional>
14
15#include "core/fxcrt/cfx_retain_ptr.h"
16#include "core/fxcrt/cfx_string_c_template.h"
17#include "core/fxcrt/cfx_string_data_template.h"
18#include "core/fxcrt/fx_memory.h"
19#include "core/fxcrt/fx_system.h"
20
21class CFX_ByteString;
22class CFX_WideString;
23
24using CFX_ByteStringC = CFX_StringCTemplate<FX_CHAR>;
25using CFX_WideStringC = CFX_StringCTemplate<FX_WCHAR>;
26
27#define FXBSTR_ID(c1, c2, c3, c4)                                      \
28  (((uint32_t)c1 << 24) | ((uint32_t)c2 << 16) | ((uint32_t)c3 << 8) | \
29   ((uint32_t)c4))
30
31// A mutable string with shared buffers using copy-on-write semantics that
32// avoids the cost of std::string's iterator stability guarantees.
33class CFX_ByteString {
34 public:
35  using CharType = FX_CHAR;
36
37  CFX_ByteString();
38  CFX_ByteString(const CFX_ByteString& other);
39  CFX_ByteString(CFX_ByteString&& other);
40
41  // Deliberately implicit to avoid calling on every string literal.
42  // NOLINTNEXTLINE(runtime/explicit)
43  CFX_ByteString(char ch);
44  // NOLINTNEXTLINE(runtime/explicit)
45  CFX_ByteString(const FX_CHAR* ptr);
46
47  CFX_ByteString(const FX_CHAR* ptr, FX_STRSIZE len);
48  CFX_ByteString(const uint8_t* ptr, FX_STRSIZE len);
49
50  explicit CFX_ByteString(const CFX_ByteStringC& bstrc);
51  CFX_ByteString(const CFX_ByteStringC& bstrc1, const CFX_ByteStringC& bstrc2);
52
53  ~CFX_ByteString();
54
55  void clear() { m_pData.Reset(); }
56
57  static CFX_ByteString FromUnicode(const FX_WCHAR* ptr, FX_STRSIZE len = -1);
58  static CFX_ByteString FromUnicode(const CFX_WideString& str);
59
60  // Explicit conversion to C-style string.
61  // Note: Any subsequent modification of |this| will invalidate the result.
62  const FX_CHAR* c_str() const { return m_pData ? m_pData->m_String : ""; }
63
64  // Explicit conversion to uint8_t*.
65  // Note: Any subsequent modification of |this| will invalidate the result.
66  const uint8_t* raw_str() const {
67    return m_pData ? reinterpret_cast<const uint8_t*>(m_pData->m_String)
68                   : nullptr;
69  }
70
71  // Explicit conversion to CFX_ByteStringC.
72  // Note: Any subsequent modification of |this| will invalidate the result.
73  CFX_ByteStringC AsStringC() const {
74    return CFX_ByteStringC(raw_str(), GetLength());
75  }
76
77  FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
78  bool IsEmpty() const { return !GetLength(); }
79
80  int Compare(const CFX_ByteStringC& str) const;
81  bool EqualNoCase(const CFX_ByteStringC& str) const;
82
83  bool operator==(const char* ptr) const;
84  bool operator==(const CFX_ByteStringC& str) const;
85  bool operator==(const CFX_ByteString& other) const;
86
87  bool operator!=(const char* ptr) const { return !(*this == ptr); }
88  bool operator!=(const CFX_ByteStringC& str) const { return !(*this == str); }
89  bool operator!=(const CFX_ByteString& other) const {
90    return !(*this == other);
91  }
92
93  bool operator<(const CFX_ByteString& str) const;
94
95  const CFX_ByteString& operator=(const FX_CHAR* str);
96  const CFX_ByteString& operator=(const CFX_ByteStringC& bstrc);
97  const CFX_ByteString& operator=(const CFX_ByteString& stringSrc);
98
99  const CFX_ByteString& operator+=(FX_CHAR ch);
100  const CFX_ByteString& operator+=(const FX_CHAR* str);
101  const CFX_ByteString& operator+=(const CFX_ByteString& str);
102  const CFX_ByteString& operator+=(const CFX_ByteStringC& bstrc);
103
104  uint8_t GetAt(FX_STRSIZE nIndex) const {
105    return m_pData ? m_pData->m_String[nIndex] : 0;
106  }
107
108  uint8_t operator[](FX_STRSIZE nIndex) const {
109    return m_pData ? m_pData->m_String[nIndex] : 0;
110  }
111
112  void SetAt(FX_STRSIZE nIndex, FX_CHAR ch);
113  FX_STRSIZE Insert(FX_STRSIZE index, FX_CHAR ch);
114  FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1);
115
116  void Format(const FX_CHAR* lpszFormat, ...);
117  void FormatV(const FX_CHAR* lpszFormat, va_list argList);
118
119  void Reserve(FX_STRSIZE len);
120  FX_CHAR* GetBuffer(FX_STRSIZE len);
121  void ReleaseBuffer(FX_STRSIZE len = -1);
122
123  CFX_ByteString Mid(FX_STRSIZE first) const;
124  CFX_ByteString Mid(FX_STRSIZE first, FX_STRSIZE count) const;
125  CFX_ByteString Left(FX_STRSIZE count) const;
126  CFX_ByteString Right(FX_STRSIZE count) const;
127
128  FX_STRSIZE Find(const CFX_ByteStringC& lpszSub, FX_STRSIZE start = 0) const;
129  FX_STRSIZE Find(FX_CHAR ch, FX_STRSIZE start = 0) const;
130  FX_STRSIZE ReverseFind(FX_CHAR ch) const;
131
132  void MakeLower();
133  void MakeUpper();
134
135  void TrimRight();
136  void TrimRight(FX_CHAR chTarget);
137  void TrimRight(const CFX_ByteStringC& lpszTargets);
138
139  void TrimLeft();
140  void TrimLeft(FX_CHAR chTarget);
141  void TrimLeft(const CFX_ByteStringC& lpszTargets);
142
143  FX_STRSIZE Replace(const CFX_ByteStringC& lpszOld,
144                     const CFX_ByteStringC& lpszNew);
145
146  FX_STRSIZE Remove(FX_CHAR ch);
147
148  CFX_WideString UTF8Decode() const;
149
150  uint32_t GetID(FX_STRSIZE start_pos = 0) const;
151
152#define FXFORMAT_SIGNED 1
153#define FXFORMAT_HEX 2
154#define FXFORMAT_CAPITAL 4
155
156  static CFX_ByteString FormatInteger(int i, uint32_t flags = 0);
157  static CFX_ByteString FormatFloat(FX_FLOAT f, int precision = 0);
158
159 protected:
160  using StringData = CFX_StringDataTemplate<FX_CHAR>;
161
162  void ReallocBeforeWrite(FX_STRSIZE nNewLen);
163  void AllocBeforeWrite(FX_STRSIZE nNewLen);
164  void AllocCopy(CFX_ByteString& dest,
165                 FX_STRSIZE nCopyLen,
166                 FX_STRSIZE nCopyIndex) const;
167  void AssignCopy(const FX_CHAR* pSrcData, FX_STRSIZE nSrcLen);
168  void Concat(const FX_CHAR* lpszSrcData, FX_STRSIZE nSrcLen);
169
170  CFX_RetainPtr<StringData> m_pData;
171
172  friend class fxcrt_ByteStringConcat_Test;
173  friend class fxcrt_ByteStringPool_Test;
174};
175
176inline bool operator==(const char* lhs, const CFX_ByteString& rhs) {
177  return rhs == lhs;
178}
179inline bool operator==(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) {
180  return rhs == lhs;
181}
182inline bool operator!=(const char* lhs, const CFX_ByteString& rhs) {
183  return rhs != lhs;
184}
185inline bool operator!=(const CFX_ByteStringC& lhs, const CFX_ByteString& rhs) {
186  return rhs != lhs;
187}
188
189inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
190                                const CFX_ByteStringC& str2) {
191  return CFX_ByteString(str1, str2);
192}
193inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
194                                const FX_CHAR* str2) {
195  return CFX_ByteString(str1, str2);
196}
197inline CFX_ByteString operator+(const FX_CHAR* str1,
198                                const CFX_ByteStringC& str2) {
199  return CFX_ByteString(str1, str2);
200}
201inline CFX_ByteString operator+(const CFX_ByteStringC& str1, FX_CHAR ch) {
202  return CFX_ByteString(str1, CFX_ByteStringC(ch));
203}
204inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteStringC& str2) {
205  return CFX_ByteString(ch, str2);
206}
207inline CFX_ByteString operator+(const CFX_ByteString& str1,
208                                const CFX_ByteString& str2) {
209  return CFX_ByteString(str1.AsStringC(), str2.AsStringC());
210}
211inline CFX_ByteString operator+(const CFX_ByteString& str1, FX_CHAR ch) {
212  return CFX_ByteString(str1.AsStringC(), CFX_ByteStringC(ch));
213}
214inline CFX_ByteString operator+(FX_CHAR ch, const CFX_ByteString& str2) {
215  return CFX_ByteString(ch, str2.AsStringC());
216}
217inline CFX_ByteString operator+(const CFX_ByteString& str1,
218                                const FX_CHAR* str2) {
219  return CFX_ByteString(str1.AsStringC(), str2);
220}
221inline CFX_ByteString operator+(const FX_CHAR* str1,
222                                const CFX_ByteString& str2) {
223  return CFX_ByteString(str1, str2.AsStringC());
224}
225inline CFX_ByteString operator+(const CFX_ByteString& str1,
226                                const CFX_ByteStringC& str2) {
227  return CFX_ByteString(str1.AsStringC(), str2);
228}
229inline CFX_ByteString operator+(const CFX_ByteStringC& str1,
230                                const CFX_ByteString& str2) {
231  return CFX_ByteString(str1, str2.AsStringC());
232}
233
234// A mutable string with shared buffers using copy-on-write semantics that
235// avoids the cost of std::string's iterator stability guarantees.
236class CFX_WideString {
237 public:
238  using CharType = FX_WCHAR;
239
240  CFX_WideString();
241  CFX_WideString(const CFX_WideString& other);
242  CFX_WideString(CFX_WideString&& other);
243
244  // Deliberately implicit to avoid calling on every string literal.
245  // NOLINTNEXTLINE(runtime/explicit)
246  CFX_WideString(FX_WCHAR ch);
247  // NOLINTNEXTLINE(runtime/explicit)
248  CFX_WideString(const FX_WCHAR* ptr);
249
250  CFX_WideString(const FX_WCHAR* ptr, FX_STRSIZE len);
251
252  explicit CFX_WideString(const CFX_WideStringC& str);
253  CFX_WideString(const CFX_WideStringC& str1, const CFX_WideStringC& str2);
254
255  ~CFX_WideString();
256
257  static CFX_WideString FromLocal(const CFX_ByteStringC& str);
258  static CFX_WideString FromCodePage(const CFX_ByteStringC& str,
259                                     uint16_t codepage);
260
261  static CFX_WideString FromUTF8(const CFX_ByteStringC& str);
262  static CFX_WideString FromUTF16LE(const unsigned short* str, FX_STRSIZE len);
263
264  static FX_STRSIZE WStringLength(const unsigned short* str);
265
266  // Explicit conversion to C-style wide string.
267  // Note: Any subsequent modification of |this| will invalidate the result.
268  const FX_WCHAR* c_str() const { return m_pData ? m_pData->m_String : L""; }
269
270  // Explicit conversion to CFX_WideStringC.
271  // Note: Any subsequent modification of |this| will invalidate the result.
272  CFX_WideStringC AsStringC() const {
273    return CFX_WideStringC(c_str(), GetLength());
274  }
275
276  void clear() { m_pData.Reset(); }
277
278  FX_STRSIZE GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
279  bool IsEmpty() const { return !GetLength(); }
280
281  const CFX_WideString& operator=(const FX_WCHAR* str);
282  const CFX_WideString& operator=(const CFX_WideString& stringSrc);
283  const CFX_WideString& operator=(const CFX_WideStringC& stringSrc);
284
285  const CFX_WideString& operator+=(const FX_WCHAR* str);
286  const CFX_WideString& operator+=(FX_WCHAR ch);
287  const CFX_WideString& operator+=(const CFX_WideString& str);
288  const CFX_WideString& operator+=(const CFX_WideStringC& str);
289
290  bool operator==(const wchar_t* ptr) const;
291  bool operator==(const CFX_WideStringC& str) const;
292  bool operator==(const CFX_WideString& other) const;
293
294  bool operator!=(const wchar_t* ptr) const { return !(*this == ptr); }
295  bool operator!=(const CFX_WideStringC& str) const { return !(*this == str); }
296  bool operator!=(const CFX_WideString& other) const {
297    return !(*this == other);
298  }
299
300  bool operator<(const CFX_WideString& str) const;
301
302  FX_WCHAR GetAt(FX_STRSIZE nIndex) const {
303    return m_pData ? m_pData->m_String[nIndex] : 0;
304  }
305
306  FX_WCHAR operator[](FX_STRSIZE nIndex) const {
307    return m_pData ? m_pData->m_String[nIndex] : 0;
308  }
309
310  void SetAt(FX_STRSIZE nIndex, FX_WCHAR ch);
311
312  int Compare(const FX_WCHAR* str) const;
313  int Compare(const CFX_WideString& str) const;
314  int CompareNoCase(const FX_WCHAR* str) const;
315
316  CFX_WideString Mid(FX_STRSIZE first) const;
317  CFX_WideString Mid(FX_STRSIZE first, FX_STRSIZE count) const;
318  CFX_WideString Left(FX_STRSIZE count) const;
319  CFX_WideString Right(FX_STRSIZE count) const;
320
321  FX_STRSIZE Insert(FX_STRSIZE index, FX_WCHAR ch);
322  FX_STRSIZE Delete(FX_STRSIZE index, FX_STRSIZE count = 1);
323
324  void Format(const FX_WCHAR* lpszFormat, ...);
325  void FormatV(const FX_WCHAR* lpszFormat, va_list argList);
326
327  void MakeLower();
328  void MakeUpper();
329
330  void TrimRight();
331  void TrimRight(FX_WCHAR chTarget);
332  void TrimRight(const CFX_WideStringC& pTargets);
333
334  void TrimLeft();
335  void TrimLeft(FX_WCHAR chTarget);
336  void TrimLeft(const CFX_WideStringC& pTargets);
337
338  void Reserve(FX_STRSIZE len);
339  FX_WCHAR* GetBuffer(FX_STRSIZE len);
340  void ReleaseBuffer(FX_STRSIZE len = -1);
341
342  int GetInteger() const;
343  FX_FLOAT GetFloat() const;
344
345  FX_STRSIZE Find(const CFX_WideStringC& pSub, FX_STRSIZE start = 0) const;
346  FX_STRSIZE Find(FX_WCHAR ch, FX_STRSIZE start = 0) const;
347  FX_STRSIZE Replace(const CFX_WideStringC& pOld, const CFX_WideStringC& pNew);
348  FX_STRSIZE Remove(FX_WCHAR ch);
349
350  CFX_ByteString UTF8Encode() const;
351  CFX_ByteString UTF16LE_Encode() const;
352
353 protected:
354  using StringData = CFX_StringDataTemplate<FX_WCHAR>;
355
356  void ReallocBeforeWrite(FX_STRSIZE nLen);
357  void AllocBeforeWrite(FX_STRSIZE nLen);
358  void AllocCopy(CFX_WideString& dest,
359                 FX_STRSIZE nCopyLen,
360                 FX_STRSIZE nCopyIndex) const;
361  void AssignCopy(const FX_WCHAR* pSrcData, FX_STRSIZE nSrcLen);
362  void Concat(const FX_WCHAR* lpszSrcData, FX_STRSIZE nSrcLen);
363
364  CFX_RetainPtr<StringData> m_pData;
365
366  friend class fxcrt_WideStringConcatInPlace_Test;
367  friend class fxcrt_WideStringPool_Test;
368};
369
370inline CFX_WideString operator+(const CFX_WideStringC& str1,
371                                const CFX_WideStringC& str2) {
372  return CFX_WideString(str1, str2);
373}
374inline CFX_WideString operator+(const CFX_WideStringC& str1,
375                                const FX_WCHAR* str2) {
376  return CFX_WideString(str1, str2);
377}
378inline CFX_WideString operator+(const FX_WCHAR* str1,
379                                const CFX_WideStringC& str2) {
380  return CFX_WideString(str1, str2);
381}
382inline CFX_WideString operator+(const CFX_WideStringC& str1, FX_WCHAR ch) {
383  return CFX_WideString(str1, CFX_WideStringC(ch));
384}
385inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideStringC& str2) {
386  return CFX_WideString(ch, str2);
387}
388inline CFX_WideString operator+(const CFX_WideString& str1,
389                                const CFX_WideString& str2) {
390  return CFX_WideString(str1.AsStringC(), str2.AsStringC());
391}
392inline CFX_WideString operator+(const CFX_WideString& str1, FX_WCHAR ch) {
393  return CFX_WideString(str1.AsStringC(), CFX_WideStringC(ch));
394}
395inline CFX_WideString operator+(FX_WCHAR ch, const CFX_WideString& str2) {
396  return CFX_WideString(ch, str2.AsStringC());
397}
398inline CFX_WideString operator+(const CFX_WideString& str1,
399                                const FX_WCHAR* str2) {
400  return CFX_WideString(str1.AsStringC(), str2);
401}
402inline CFX_WideString operator+(const FX_WCHAR* str1,
403                                const CFX_WideString& str2) {
404  return CFX_WideString(str1, str2.AsStringC());
405}
406inline CFX_WideString operator+(const CFX_WideString& str1,
407                                const CFX_WideStringC& str2) {
408  return CFX_WideString(str1.AsStringC(), str2);
409}
410inline CFX_WideString operator+(const CFX_WideStringC& str1,
411                                const CFX_WideString& str2) {
412  return CFX_WideString(str1, str2.AsStringC());
413}
414inline bool operator==(const wchar_t* lhs, const CFX_WideString& rhs) {
415  return rhs == lhs;
416}
417inline bool operator==(const CFX_WideStringC& lhs, const CFX_WideString& rhs) {
418  return rhs == lhs;
419}
420inline bool operator!=(const wchar_t* lhs, const CFX_WideString& rhs) {
421  return rhs != lhs;
422}
423inline bool operator!=(const CFX_WideStringC& lhs, const CFX_WideString& rhs) {
424  return rhs != lhs;
425}
426
427CFX_ByteString FX_UTF8Encode(const CFX_WideStringC& wsStr);
428FX_FLOAT FX_atof(const CFX_ByteStringC& str);
429inline FX_FLOAT FX_atof(const CFX_WideStringC& wsStr) {
430  return FX_atof(FX_UTF8Encode(wsStr).c_str());
431}
432bool FX_atonum(const CFX_ByteStringC& str, void* pData);
433FX_STRSIZE FX_ftoa(FX_FLOAT f, FX_CHAR* buf);
434
435uint32_t FX_HashCode_GetA(const CFX_ByteStringC& str, bool bIgnoreCase);
436uint32_t FX_HashCode_GetW(const CFX_WideStringC& str, bool bIgnoreCase);
437
438namespace std {
439
440template <>
441struct hash<CFX_ByteString> {
442  std::size_t operator()(const CFX_ByteString& str) const {
443    return FX_HashCode_GetA(str.AsStringC(), false);
444  }
445};
446
447template <>
448struct hash<CFX_WideString> {
449  std::size_t operator()(const CFX_WideString& str) const {
450    return FX_HashCode_GetW(str.AsStringC(), false);
451  }
452};
453
454}  // namespace std
455
456extern template struct std::hash<CFX_ByteString>;
457extern template struct std::hash<CFX_WideString>;
458
459#endif  // CORE_FXCRT_FX_STRING_H_
460