ustring.cpp revision 9066cfe9886ac131c34d59ed0e2d287b0e3c0087
1// This file is part of the ustl library, an STL implementation.
2//
3// Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net>
4// This file is free software, distributed under the MIT License.
5//
6// ustring.cpp
7//
8//	STL basic_string equivalent functionality.
9//
10
11#include "uassert.h"
12#include "ustring.h"
13#include "mistream.h"
14#include "mostream.h"
15#include "ualgo.h"
16#include <stdio.h>	// for vsnprintf (in string::format)
17
18#include "uassert.h"
19
20namespace ustl {
21
22//----------------------------------------------------------------------
23
24const uoff_t string::npos;
25const string::size_type string::size_Terminator;
26const string::value_type string::c_Terminator;
27const char string::empty_string[string::size_Terminator] = "";
28
29typedef utf8in_iterator<string::const_iterator> utf8icstring_iterator;
30typedef utf8in_iterator<string::iterator> utf8istring_iterator;
31typedef utf8out_iterator<string::iterator> utf8ostring_iterator;
32
33//----------------------------------------------------------------------
34
35/// Creates an empty string.
36string::string (void)
37: memblock ()
38{
39    link (empty_string, 0U);
40}
41
42/// Assigns itself the value of string \p s
43string::string (const string& s)
44: memblock()
45{
46    if (s.is_linked())
47	link (s.c_str(), s.size());
48    else
49	assign (s);
50}
51
52/// Links to \p s
53string::string (const_pointer s)
54: memblock ()
55{
56    if (!s)
57	s = empty_string;
58    link (s, strlen(s));
59}
60
61/// Creates a string of length \p n filled with character \p c.
62string::string (size_type n, value_type c)
63: memblock ()
64{
65    resize (n);
66    fill_n (begin(), n, c);
67}
68
69/// Resize the string to \p n characters. New space contents is undefined.
70void string::resize (size_type n)
71{
72    memblock::resize (n);
73    at(n) = c_Terminator;
74}
75
76/// Assigns itself the value of string \p s
77void string::assign (const_pointer s)
78{
79    if (!s)
80	s = empty_string;
81    assign (s, strlen (s));
82}
83
84/// Assigns itself the value of string \p s of length \p len.
85void string::assign (const_pointer s, size_type len)
86{
87    while (len && s[len - 1] == c_Terminator)
88	-- len;
89    resize (len);
90    copy (s, len);
91}
92
93/// Appends to itself the value of string \p s of length \p len.
94void string::append (const_pointer s)
95{
96    if (!s)
97	s = empty_string;
98    append (s, strlen (s));
99}
100
101/// Appends to itself the value of string \p s of length \p len.
102void string::append (const_pointer s, size_type len)
103{
104    while (len && s[len - 1] == c_Terminator)
105	-- len;
106    resize (size() + len);
107    copy_n (s, len, end() - len);
108}
109
110/// Appends to itself \p n characters of value \p c.
111void string::append (size_type n, value_type c)
112{
113    resize (size() + n);
114    fill_n (end() - n, n, c);
115}
116
117/// Copies into itself at offset \p start, the value of string \p p of length \p n.
118string::size_type string::copyto (pointer p, size_type n, const_iterator start) const
119{
120    assert (p && n);
121    if (!start)
122	start = begin();
123    const size_type btc = min(n - size_Terminator, size());
124    copy_n (start, btc, p);
125    p[btc] = c_Terminator;
126    return (btc + size_Terminator);
127}
128
129/// Returns comparison value regarding string \p s.
130/// The return value is:
131/// \li 1 if this string is greater (by value, not length) than string \p s
132/// \li 0 if this string is equal to string \p s
133/// \li -1 if this string is less than string \p s
134///
135/*static*/ int string::compare (const_iterator first1, const_iterator last1, const_iterator first2, const_iterator last2)
136{
137    assert (first1 <= last1 && (first2 <= last2 || !last2) && "Negative ranges result in memory allocation errors.");
138    const size_type len1 = distance (first1, last1), len2 = distance (first2, last2);
139    const int rvbylen = sign (int(len1 - len2));
140    int rv = memcmp (first1, first2, min (len1, len2));
141    return (rv ? rv : rvbylen);
142}
143
144/// Returns true if this string is equal to string \p s.
145bool string::operator== (const_pointer s) const
146{
147    if (!s)
148	s = empty_string;
149    return (size() == strlen(s) && 0 == memcmp (c_str(), s, size()));
150}
151
152/// Returns the beginning of character \p i.
153string::iterator string::utf8_iat (uoff_t i)
154{
155    utf8istring_iterator cfinder (begin());
156    cfinder += i;
157    return (cfinder.base());
158}
159
160/// Inserts wide character \p c at \p ip \p n times as a UTF-8 string.
161///
162/// \p ip is a character position, not a byte position, and must fall in
163/// the 0 through length() range.
164/// The first argument is not an iterator because it is rather difficult
165/// to get one. You'd have to use ((utf8begin() + n).base()) as the first
166/// argument, which is rather ugly. Besides, then this insert would be
167/// ambiguous with the regular character insert.
168///
169void string::insert (const uoff_t ip, wchar_t c, size_type n)
170{
171    iterator ipp (utf8_iat (ip));
172    ipp = iterator (memblock::insert (memblock::iterator(ipp), n * Utf8Bytes(c)));
173    fill_n (utf8out (ipp), n, c);
174    *end() = c_Terminator;
175}
176
177/// Inserts sequence of wide characters at \p ip.
178void string::insert (const uoff_t ip, const wchar_t* first, const wchar_t* last, const size_type n)
179{
180    iterator ipp (utf8_iat (ip));
181    size_type nti = distance (first, last), bti = 0;
182    for (uoff_t i = 0; i < nti; ++ i)
183	bti += Utf8Bytes(first[i]);
184    ipp = iterator (memblock::insert (memblock::iterator(ipp), n * bti));
185    utf8ostring_iterator uout (utf8out (ipp));
186    for (uoff_t j = 0; j < n; ++ j)
187	for (uoff_t k = 0; k < nti; ++ k, ++ uout)
188	    *uout = first[k];
189    *end() = c_Terminator;
190}
191
192/// Inserts character \p c into this string at \p start.
193string::iterator string::insert (iterator start, const_reference c, size_type n)
194{
195    start = iterator (memblock::insert (memblock::iterator(start), n));
196    fill_n (start, n, c);
197    *end() = c_Terminator;
198    return (start);
199}
200
201/// Inserts \p count instances of string \p s at offset \p start.
202string::iterator string::insert (iterator start, const_pointer s, size_type n)
203{
204    if (!s)
205	s = empty_string;
206    return (insert (start, s, s + strlen(s), n));
207}
208
209/// Inserts [first,last] \p n times.
210string::iterator string::insert (iterator start, const_pointer first, const_pointer last, size_type n)
211{
212    assert (first <= last);
213    assert (begin() <= start && end() >= start);
214    assert ((first < begin() || first >= end() || size() + abs_distance(first,last) < capacity()) && "Insertion of self with autoresize is not supported");
215    start = iterator (memblock::insert (memblock::iterator(start), distance(first, last) * n));
216    fill (memblock::iterator(start), first, distance(first, last), n);
217    *end() = c_Terminator;
218    return (start);
219}
220
221/// Erases \p size bytes at \p start.
222string::iterator string::erase (iterator ep, size_type n)
223{
224    string::iterator rv = memblock::erase (memblock::iterator(ep), n);
225    *end() = c_Terminator;
226    return (rv);
227}
228
229/// Erases \p size characters at \p start.
230/// \p start is a character position, not a byte position, and must be
231/// in the 0..length() range.
232///
233void string::erase (uoff_t ep, size_type n)
234{
235    iterator first (utf8_iat(ep));
236    size_t nbytes (utf8_iat(ep + n) - first);
237    memblock::erase (first, nbytes);
238    *end() = c_Terminator;
239}
240
241/// Replaces range [\p start, \p start + \p len] with string \p s.
242void string::replace (iterator first, iterator last, const_pointer s)
243{
244    if (!s)
245	s = empty_string;
246    replace (first, last, s, s + strlen(s));
247}
248
249/// Replaces range [\p start, \p start + \p len] with \p count instances of string \p s.
250void string::replace (iterator first, iterator last, const_pointer i1, const_pointer i2, size_type n)
251{
252    assert (first <= last);
253    assert (n || distance(first, last));
254    assert (first >= begin() && first <= end() && last >= first && last <= end());
255    assert ((i1 < begin() || i1 >= end() || abs_distance(i1,i2) * n + size() < capacity()) && "Replacement by self can not autoresize");
256    const size_type bte = distance(first, last), bti = distance(i1, i2) * n;
257    if (bti < bte)
258	first = iterator (memblock::erase (memblock::iterator(first), bte - bti));
259    else if (bte < bti)
260	first = iterator (memblock::insert (memblock::iterator(first), bti - bte));
261    fill (memblock::iterator(first), i1, distance(i1, i2), n);
262    *end() = c_Terminator;
263}
264
265/// Returns the offset of the first occurence of \p c after \p pos.
266uoff_t string::find (const_reference c, uoff_t pos) const
267{
268    const_iterator found = ::ustl::find (iat(pos), end(), c);
269    return (found < end() ? distance(begin(),found) : npos);
270}
271
272/// Returns the offset of the first occurence of substring \p s of length \p n after \p pos.
273uoff_t string::find (const string& s, uoff_t pos) const
274{
275    if (s.empty() || s.size() > size() - pos)
276	return (npos);
277    const uoff_t endi = s.size() - 1;
278    const_reference endchar = s[endi];
279    uoff_t lastPos = endi;
280    while (lastPos-- && s[lastPos] != endchar);
281    const size_type skip = endi - lastPos;
282    const_iterator i = iat(pos) + endi;
283    for (; i < end() && (i = ::ustl::find (i, end(), endchar)) < end(); i += skip)
284	if (memcmp (i - endi, s.c_str(), s.size()) == 0)
285	    return (distance (begin(), i) - endi);
286    return (npos);
287}
288
289/// Returns the offset of the last occurence of character \p c before \p pos.
290uoff_t string::rfind (const_reference c, uoff_t pos) const
291{
292    for (int i = min(pos,size()-1); i >= 0; --i)
293	if (at(i) == c)
294	    return (i);
295    return (npos);
296}
297
298/// Returns the offset of the last occurence of substring \p s of size \p n before \p pos.
299uoff_t string::rfind (const string& s, uoff_t pos) const
300{
301    const_iterator d = iat(pos) - 1;
302    const_iterator sp = begin() + s.size() - 1;
303    const_iterator m = s.end() - 1;
304    for (uoff_t i = 0; d > sp && i < s.size(); -- d)
305	for (i = 0; i < s.size(); ++ i)
306	    if (m[-i] != d[-i])
307		break;
308    return (d > sp ? distance (begin(), d + 2 - s.size()) : npos);
309}
310
311/// Returns the offset of the first occurence of one of characters in \p s of size \p n after \p pos.
312uoff_t string::find_first_of (const string& s, uoff_t pos) const
313{
314    for (uoff_t i = min(pos,size()); i < size(); ++ i)
315	if (s.find (at(i)) != npos)
316	    return (i);
317    return (npos);
318}
319
320/// Returns the offset of the first occurence of one of characters not in \p s of size \p n after \p pos.
321uoff_t string::find_first_not_of (const string& s, uoff_t pos) const
322{
323    for (uoff_t i = min(pos,size()); i < size(); ++ i)
324	if (s.find (at(i)) == npos)
325	    return (i);
326    return (npos);
327}
328
329/// Returns the offset of the last occurence of one of characters in \p s of size \p n before \p pos.
330uoff_t string::find_last_of (const string& s, uoff_t pos) const
331{
332    for (int i = min(pos,size()-1); i >= 0; -- i)
333	if (s.find (at(i)) != npos)
334	    return (i);
335    return (npos);
336}
337
338/// Returns the offset of the last occurence of one of characters not in \p s of size \p n before \p pos.
339uoff_t string::find_last_not_of (const string& s, uoff_t pos) const
340{
341    for (int i = min(pos,size()-1); i >= 0; -- i)
342	if (s.find (at(i)) == npos)
343	    return (i);
344    return (npos);
345}
346
347/// Equivalent to a vsprintf on the string.
348int string::vformat (const char* fmt, va_list args)
349{
350#if HAVE_VA_COPY
351    va_list args2;
352#else
353    #define args2 args
354    #undef __va_copy
355    #define __va_copy(x,y)
356#endif
357    size_t rv = size();
358    do {
359	reserve (rv);
360	__va_copy (args2, args);
361	rv = vsnprintf (data(), memblock::capacity(), fmt, args2);
362	rv = min (rv, memblock::capacity());
363    } while (rv > capacity());
364    resize (min (rv, capacity()));
365    return (rv);
366}
367
368/// Equivalent to a sprintf on the string.
369int string::format (const char* fmt, ...)
370{
371    va_list args;
372    va_start (args, fmt);
373    const int rv = vformat (fmt, args);
374    va_end (args);
375    return (rv);
376}
377
378/// Returns the number of bytes required to write this object to a stream.
379size_t string::stream_size (void) const
380{
381    return (Utf8Bytes(size()) + size());
382}
383
384/// Reads the object from stream \p os
385void string::read (istream& is)
386{
387    char szbuf [8];
388    is >> szbuf[0];
389    size_t szsz (Utf8SequenceBytes (szbuf[0]) - 1), n = 0;
390    is.verify_remaining ("read", "ustl::string", szsz);
391    is.read (szbuf + 1, szsz);
392    n = *utf8in(szbuf);
393    is.verify_remaining ("read", "ustl::string", n);
394    resize (n);
395    is.read (data(), size());
396}
397
398/// Writes the object to stream \p os
399void string::write (ostream& os) const
400{
401    const written_size_type sz (size());
402    assert (sz == size() && "No support for writing strings larger than 4G");
403
404    char szbuf [8];
405    utf8out_iterator<char*> szout (szbuf);
406    *szout = sz;
407    size_t szsz = distance (szbuf, szout.base());
408
409    os.verify_remaining ("write", "ustl::string", szsz + sz);
410    os.write (szbuf, szsz);
411    os.write (cdata(), sz);
412}
413
414/// Returns a hash value for [first, last)
415/*static*/ hashvalue_t string::hash (const char* first, const char* last)
416{
417    hashvalue_t h = 0;
418    // This has the bits flowing into each other from both sides of the number
419    for (; first < last; ++ first)
420	h = *first + ((h << 7) | (h >> BitsInType(hashvalue_t) - 7));
421    return (h);
422}
423
424} // namespace ustl
425
426