ustring.h revision 9066cfe9886ac131c34d59ed0e2d287b0e3c0087
1// This file is part of the ustl library, an STL implementation.
2//
3// Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net>
4// This file is free software, distributed under the MIT License.
5//
6// ustring.h
7//
8
9#ifndef USTRING_H_1249CB7A098A9010763AAC6D37B133CF
10#define USTRING_H_1249CB7A098A9010763AAC6D37B133CF
11
12#include "memblock.h"
13#include "utf8.h"
14#include <stdarg.h>	// for va_list, va_start, and va_end (in string::format)
15
16namespace ustl {
17
18/// \class string ustring.h ustl.h
19/// \ingroup Sequences
20///
21/// \brief STL basic_string&lt;char&gt; equivalent.
22///
23/// An STL container for text string manipulation.
24/// Differences from C++ standard:
25///	- string is a class, not a template. Wide characters are assumed to be
26///		encoded with utf8 at all times except when rendering or editing,
27///		where you would use a utf8 iterator.
28/// 	- format member function - you can, of course use an \ref ostringstream,
29///		which also have format functions, but most of the time this way
30///		is more convenient. Because uSTL does not implement locales,
31///		format is the only way to create localized strings.
32/// 	- const char* cast operator. It is much clearer to use this than having
33/// 		to type .c_str() every time.
34/// 	- length returns the number of _characters_, not bytes.
35///		This function is O(N), so use wisely.
36///
37class string : public memblock {
38public:
39    typedef char		value_type;
40    typedef value_type*		pointer;
41    typedef const value_type*	const_pointer;
42    typedef wchar_t		wvalue_type;
43    typedef wvalue_type*	wpointer;
44    typedef const wvalue_type*	const_wpointer;
45    typedef pointer		iterator;
46    typedef const_pointer	const_iterator;
47    typedef value_type&		reference;
48    typedef value_type		const_reference;
49    typedef ::ustl::reverse_iterator<iterator>		reverse_iterator;
50    typedef ::ustl::reverse_iterator<const_iterator>	const_reverse_iterator;
51    typedef utf8in_iterator<const_iterator>		utf8_iterator;
52public:
53    static const uoff_t npos = static_cast<uoff_t>(-1);			///< Value that means the end of string.
54    static const value_type c_Terminator = 0;				///< String terminator
55    static const size_type size_Terminator = sizeof(c_Terminator);	///< Most systems terminate strings with '\\0'
56    static const char empty_string [size_Terminator];			///< An empty string.
57public:
58				string (void);
59				string (const string& s);
60    inline			string (const string& s, uoff_t o, size_type n);
61    inline explicit		string (const cmemlink& l);
62				string (const_pointer s);
63    inline			string (const_pointer s, size_type len);
64    inline			string (const_pointer s1, const_pointer s2);
65    explicit			string (size_type n, value_type c = c_Terminator);
66    inline pointer		data (void)		{ return (string::pointer (memblock::data())); }
67    inline const_pointer	c_str (void) const	{ return (string::const_pointer (memblock::cdata())); }
68    inline size_type		max_size (void) const	{ size_type s (memblock::max_size()); return (s - !!s); }
69    inline size_type		capacity (void) const	{ size_type c (memblock::capacity()); return (c - !!c); }
70    void			resize (size_type n);
71    inline void			clear (void)		{ resize (0); }
72    inline const_iterator	begin (void) const	{ return (const_iterator (memblock::begin())); }
73    inline iterator		begin (void)		{ return (iterator (memblock::begin())); }
74    inline const_iterator	end (void) const	{ return (const_iterator (memblock::end())); }
75    inline iterator		end (void)		{ return (iterator (memblock::end())); }
76  inline const_reverse_iterator	rbegin (void) const	{ return (const_reverse_iterator (end())); }
77    inline reverse_iterator	rbegin (void)		{ return (reverse_iterator (end())); }
78  inline const_reverse_iterator	rend (void) const	{ return (const_reverse_iterator (begin())); }
79    inline reverse_iterator	rend (void)		{ return (reverse_iterator (begin())); }
80    inline utf8_iterator	utf8_begin (void) const	{ return (utf8_iterator (begin())); }
81    inline utf8_iterator	utf8_end (void) const	{ return (utf8_iterator (end())); }
82    inline const_reference	at (uoff_t pos) const	{ assert (pos <= size() && begin()); return (begin()[pos]); }
83    inline reference		at (uoff_t pos)		{ assert (pos <= size() && begin()); return (begin()[pos]); }
84    inline const_iterator	iat (uoff_t pos) const	{ return (begin() + min (pos, size())); }
85    inline iterator		iat (uoff_t pos)	{ return (begin() + min (pos, size())); }
86    inline size_type		length (void) const	{ return (distance (utf8_begin(), utf8_end())); }
87    inline void			append (const_iterator i1, const_iterator i2)	{ append (i1, distance (i1, i2)); }
88    void	   		append (const_pointer s, size_type len);
89    void	   		append (const_pointer s);
90    void			append (size_type n, const_reference c);
91    inline void			append (size_type n, wvalue_type c)		{ insert (size(), c, n); }
92    inline void			append (const_wpointer s1, const_wpointer s2)	{ insert (size(), s1, s2); }
93    inline void			append (const_wpointer s)			{ const_wpointer se (s); for (;se&&*se;++se); append (s, se); }
94    inline void			append (const string& s)			{ append (s.begin(), s.end()); }
95    inline void			append (const string& s, uoff_t o, size_type n)	{ append (s.iat(o), s.iat(o+n)); }
96    inline void			assign (const_iterator i1, const_iterator i2)	{ assign (i1, distance (i1, i2)); }
97    void	    		assign (const_pointer s, size_type len);
98    void	    		assign (const_pointer s);
99    inline void			assign (const_wpointer s1, const_wpointer s2)	{ clear(); append (s1, s2); }
100    inline void			assign (const_wpointer s1)			{ clear(); append (s1); }
101    inline void			assign (const string& s)			{ assign (s.begin(), s.end()); }
102    inline void			assign (const string& s, uoff_t o, size_type n)	{ assign (s.iat(o), s.iat(o+n)); }
103    size_type			copyto (pointer p, size_type n, const_iterator start = NULL) const;
104    inline int			compare (const string& s) const	{ return (compare (begin(), end(), s.begin(), s.end())); }
105    inline int			compare (const_pointer s) const	{ return (compare (begin(), end(), s, s + strlen(s))); }
106    static int			compare (const_iterator first1, const_iterator last1, const_iterator first2, const_iterator last2);
107    inline			operator const value_type* (void) const;
108    inline			operator value_type* (void);
109    inline const string&	operator= (const string& s)	{ assign (s.begin(), s.end()); return (*this); }
110    inline const string&	operator= (const_reference c)	{ assign (&c, 1); return (*this); }
111    inline const string&	operator= (const_pointer s)	{ assign (s); return (*this); }
112    inline const string&	operator= (const_wpointer s)	{ assign (s); return (*this); }
113    inline const string&	operator+= (const string& s)	{ append (s.begin(), s.size()); return (*this); }
114    inline const string&	operator+= (const_reference c)	{ append (1, c); return (*this); }
115    inline const string&	operator+= (const_pointer s)	{ append (s); return (*this); }
116    inline const string&	operator+= (wvalue_type c)	{ append (1, c); return (*this); }
117    inline const string&	operator+= (const_wpointer s)	{ append (s); return (*this); }
118    inline string		operator+ (const string& s) const;
119    inline bool			operator== (const string& s) const	{ return (memblock::operator== (s)); }
120    bool			operator== (const_pointer s) const;
121    inline bool			operator== (const_reference c) const	{ return (size() == 1 && c == at(0)); }
122    inline bool			operator!= (const string& s) const	{ return (!operator== (s)); }
123    inline bool			operator!= (const_pointer s) const	{ return (!operator== (s)); }
124    inline bool			operator!= (const_reference c) const	{ return (!operator== (c)); }
125    inline bool			operator< (const string& s) const	{ return (0 > compare (s)); }
126    inline bool			operator< (const_pointer s) const	{ return (0 > compare (s)); }
127    inline bool			operator< (const_reference c) const	{ return (0 > compare (begin(), end(), &c, &c + 1)); }
128    inline bool			operator> (const_pointer s) const	{ return (0 < compare (s)); }
129    void			insert (const uoff_t ip, wvalue_type c, size_type n = 1);
130    void			insert (const uoff_t ip, const_wpointer first, const_wpointer last, const size_type n = 1);
131    iterator			insert (iterator start, const_reference c, size_type n = 1);
132    iterator			insert (iterator start, const_pointer s, size_type n = 1);
133    iterator			insert (iterator start, const_pointer first, const_iterator last, size_type n = 1);
134    inline void			insert (uoff_t ip, const_pointer s, size_type nlen)		{ insert (iat(ip), s, s + nlen); }
135    inline void			insert (uoff_t ip, size_type n, value_type c)			{ insert (iat(ip), c, n); }
136    inline void			insert (uoff_t ip, const string& s, uoff_t sp, size_type slen)	{ insert (iat(ip), s.iat(sp), s.iat(sp + slen)); }
137    iterator			erase (iterator start, size_type size = 1);
138    void			erase (uoff_t start, size_type size = 1);
139    inline iterator		erase (iterator first, const_iterator last)	{ return (erase (first, size_type(distance(first,last)))); }
140				OVERLOAD_POINTER_AND_SIZE_T_V2(erase, iterator)
141    inline void			push_back (const_reference c)	{ append (1, c); }
142    inline void			push_back (wvalue_type c)	{ append (1, c); }
143    inline void			pop_back (void)			{ resize (size() - 1); }
144    void			replace (iterator first, iterator last, const_pointer s);
145    void			replace (iterator first, iterator last, const_pointer i1, const_pointer i2, size_type n = 1);
146    inline void			replace (iterator first, iterator last, const string& s)			{ replace (first, last, s.begin(), s.end()); }
147    inline void			replace (iterator first, iterator last, const_pointer s, size_type slen)	{ replace (first, last, s, s + slen); }
148    inline void			replace (iterator first, iterator last, size_type n, value_type c)		{ replace (first, last, &c, &c + 1, n); }
149    inline void			replace (uoff_t rp, size_type n, const string& s)				{ replace (iat(rp), iat(rp + n), s); }
150    inline void			replace (uoff_t rp, size_type n, const string& s, uoff_t sp, size_type slen)	{ replace (iat(rp), iat(rp + n), s.iat(sp), s.iat(sp + slen)); }
151    inline void			replace (uoff_t rp, size_type n, const_pointer s, size_type slen)		{ replace (iat(rp), iat(rp + n), s, s + slen); }
152    inline void			replace (uoff_t rp, size_type n, const_pointer s)				{ replace (iat(rp), iat(rp + n), string(s)); }
153    inline void			replace (uoff_t rp, size_type n, size_type count, value_type c)			{ replace (iat(rp), iat(rp + n), count, c); }
154    inline string		substr (uoff_t o, size_type n) const	{ return (string (*this, o, n)); }
155    uoff_t			find (const_reference c, uoff_t pos = 0) const;
156    uoff_t			find (const string& s, uoff_t pos = 0) const;
157    uoff_t			rfind (const_reference c, uoff_t pos = npos) const;
158    uoff_t			rfind (const string& s, uoff_t pos = npos) const;
159    uoff_t			find_first_of (const string& s, uoff_t pos = 0) const;
160    uoff_t			find_first_not_of (const string& s, uoff_t pos = 0) const;
161    uoff_t			find_last_of (const string& s, uoff_t pos = npos) const;
162    uoff_t			find_last_not_of (const string& s, uoff_t pos = npos) const;
163    int				vformat (const char* fmt, va_list args);
164    int				format (const char* fmt, ...) __attribute__((__format__(__printf__, 2, 3)));
165    void			read (istream&);
166    void			write (ostream& os) const;
167    size_t			stream_size (void) const;
168    static hashvalue_t		hash (const char* f1, const char* l1);
169private:
170    DLL_LOCAL iterator		utf8_iat (uoff_t i);
171protected:
172    inline virtual size_type	minimumFreeCapacity (void) const { return (size_Terminator); }
173};
174
175//----------------------------------------------------------------------
176
177/// Assigns itself the value of string \p s
178inline string::string (const cmemlink& s)
179: memblock ()
180{
181    assign (const_iterator (s.begin()), s.size());
182}
183
184/// Assigns itself a [o,o+n) substring of \p s.
185inline string::string (const string& s, uoff_t o, size_type n)
186: memblock()
187{
188    assign (s, o, n);
189}
190
191/// Copies the value of \p s of length \p len into itself.
192inline string::string (const_pointer s, size_type len)
193: memblock ()
194{
195    assign (s, len);
196}
197
198/// Copies into itself the string data between \p s1 and \p s2
199inline string::string (const_pointer s1, const_pointer s2)
200: memblock ()
201{
202    assert (s1 <= s2 && "Negative ranges result in memory allocation errors.");
203    assign (s1, s2);
204}
205
206/// Returns the pointer to the first character.
207inline string::operator const string::value_type* (void) const
208{
209    assert ((!end() || *end() == c_Terminator) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking.");
210    return (begin());
211}
212
213/// Returns the pointer to the first character.
214inline string::operator string::value_type* (void)
215{
216    assert ((end() && *end() == c_Terminator) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking.");
217    return (begin());
218}
219
220/// Concatenates itself with \p s
221inline string string::operator+ (const string& s) const
222{
223    string result (*this);
224    result += s;
225    return (result);
226}
227
228//----------------------------------------------------------------------
229// Operators needed to avoid comparing pointer to pointer
230
231#define PTR_STRING_CMP(op, impl)	\
232inline bool op (const char* s1, const string& s2) { return impl; }
233PTR_STRING_CMP (operator==, (s2 == s1))
234PTR_STRING_CMP (operator!=, (s2 != s1))
235PTR_STRING_CMP (operator<,  (s2 >  s1))
236PTR_STRING_CMP (operator<=, (s2 >= s1))
237PTR_STRING_CMP (operator>,  (s2 <  s1))
238PTR_STRING_CMP (operator>=, (s2 <= s1))
239#undef PTR_STRING_CMP
240
241//----------------------------------------------------------------------
242
243template <typename T>
244inline hashvalue_t hash_value (const T& v)
245{ return (string::hash (v.begin(), v.end())); }
246
247template <>
248inline hashvalue_t hash_value (const string::const_pointer& v)
249{ return (string::hash (v, v + strlen(v))); }
250
251template <>
252inline hashvalue_t hash_value (const string::pointer& v)
253{ return (string::hash (v, v + strlen(v))); }
254
255//----------------------------------------------------------------------
256
257} // namespace ustl
258
259// Specialization for stream alignment
260ALIGNOF (ustl::string, alignof (string::value_type()))
261
262#endif
263
264