1// This file is part of the ustl library, an STL implementation.
2//
3// Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net>
4// This file is free software, distributed under the MIT License.
5//
6/// \file uutility.h
7///
8/// \brief Utility templates.
9///
10/// Everything in here except min(), max(), distance(), and advance()
11/// are uSTL extensions and are absent from other STL implementations.
12///
13
14#ifndef UUTILITY_H_6A58BD296269A82A4AAAA4FD19FDB3AC
15#define UUTILITY_H_6A58BD296269A82A4AAAA4FD19FDB3AC
16
17#include "uassert.h"
18#include "utypes.h"
19
20#if PLATFORM_ANDROID
21#include <stdio.h>
22#undef CPU_HAS_MMX
23#endif
24
25namespace ustl {
26
27#ifdef __GNUC__
28    /// Returns the number of elements in a static vector
29    #define VectorSize(v)	(sizeof(v) / sizeof(*v))
30#else
31    // Old compilers will not be able to evaluate *v on an empty vector.
32    // The tradeoff here is that VectorSize will not be able to measure arrays of local structs.
33    #define VectorSize(v)	(sizeof(v) / ustl::size_of_elements(1, v))
34#endif
35
36/// Expands into a ptr,size expression for the given static vector; useful as link arguments.
37#define VectorBlock(v)	(v)+0, VectorSize(v)	// +0 makes it work under gcc 2.95
38/// Expands into a begin,end expression for the given static vector; useful for algorithm arguments.
39#define VectorRange(v)	VectorBlock(v)+(v)
40
41/// Returns the number of bits in the given type
42#define BitsInType(t)	(sizeof(t) * CHAR_BIT)
43
44/// Returns the mask of type \p t with the lowest \p n bits set.
45#define BitMask(t,n)	(t(~t(0)) >> ((sizeof(t) * CHAR_BIT) - (n)))
46
47/// Argument that is used only in debug builds (as in an assert)
48#ifndef NDEBUG
49    #define DebugArg(x)	x
50#else
51    #define DebugArg(x)
52#endif
53
54/// Shorthand for container iteration.
55#define foreach(type,i,ctr)	for (type i = (ctr).begin(); i != (ctr).end(); ++ i)
56/// Shorthand for container reverse iteration.
57#define eachfor(type,i,ctr)	for (type i = (ctr).rbegin(); i != (ctr).rend(); ++ i)
58
59/// Macro for passing template types as macro arguments.
60/// \@{
61#define TEMPLATE_FULL_DECL1(d1,t1)		template <d1 t1>
62#define TEMPLATE_FULL_DECL2(d1,t1,d2,t2)	template <d1 t1, d2 t2>
63#define TEMPLATE_FULL_DECL3(d1,t1,d2,t2,d3,t3)	template <d1 t1, d2 t2, d3 t3>
64#define TEMPLATE_DECL1(t1)		TEMPLATE_FULL_DECL1(typename,t1)
65#define TEMPLATE_DECL2(t1,t2)		TEMPLATE_FULL_DECL2(typename,t1,typename,t2)
66#define TEMPLATE_DECL3(t1,t2,t3)	TEMPLATE_FULL_DECL3(typename,t1,typename,t2,typename,t3)
67#define TEMPLATE_TYPE1(type,a1)		type<a1>
68#define TEMPLATE_TYPE2(type,a1,a2)	type<a1,a2>
69#define TEMPLATE_TYPE3(type,a1,a2,a3)	type<a1,a2,a3>
70/// \@}
71
72/// Returns the minimum of \p a and \p b
73template <typename T1, typename T2>
74inline const T1 min (const T1& a, const T2& b)
75{
76    return (a < b ? a : b);
77}
78
79/// Returns the maximum of \p a and \p b
80template <typename T1, typename T2>
81inline const T1 max (const T1& a, const T2& b)
82{
83    return (b < a ? a : b);
84}
85
86/// \brief Divides \p n1 by \p n2 and rounds the result up.
87/// This is in contrast to regular division, which rounds down.
88/// Negative numbers are rounded down because they are an unusual case, supporting
89/// which would require a branch. Since this is frequently used in graphics, the
90/// speed is important.
91///
92template <typename T1, typename T2>
93inline T1 DivRU (T1 n1, T2 n2)
94{
95    return (n1 / n2 + (n1 % n2 > 0));
96}
97
98/// The alignment performed by default.
99const size_t c_DefaultAlignment = __alignof__(void*);
100
101/// \brief Rounds \p n up to be divisible by \p grain
102template <typename T>
103inline T Align (T n, size_t grain = c_DefaultAlignment)
104{
105    T a, r = n % grain;
106    if (grain == 2) return (n + r);
107    switch (grain) {
108	case 4: case 8: case 16: a = (n & ~(grain - 1)) + grain; break;
109	default:		 a = n + (grain - r);
110    };
111    return (r ? a : n);
112}
113
114/// Offsets an iterator
115template <typename T>
116inline T advance (T i, ssize_t offset)
117{
118    return (i + offset);
119}
120
121#ifndef DOXYGEN_SHOULD_SKIP_THIS
122/// Offsets a void pointer
123template <>
124inline const void* advance (const void* p, ssize_t offset)
125{
126    assert (p || !offset);
127    return (reinterpret_cast<const uint8_t*>(p) + offset);
128}
129
130/// Offsets a void pointer
131template <>
132inline void* advance (void* p, ssize_t offset)
133{
134    assert (p || !offset);
135    return (reinterpret_cast<uint8_t*>(p) + offset);
136}
137#endif
138
139/// Returns the difference \p p1 - \p p2
140template <typename T1, typename T2>
141inline ptrdiff_t distance (T1 i1, T2 i2)
142{
143    return (i2 - i1);
144}
145
146#ifndef DOXYGEN_SHOULD_SKIP_THIS
147#define UNVOID_DISTANCE(T1const,T2const)				   \
148template <> inline ptrdiff_t distance (T1const void* p1, T2const void* p2) \
149{ return ((T2const uint8_t*)(p2) - (T1const uint8_t*)(p1)); }
150UNVOID_DISTANCE(,)
151UNVOID_DISTANCE(const,const)
152UNVOID_DISTANCE(,const)
153UNVOID_DISTANCE(const,)
154#undef UNVOID_DISTANCE
155#endif
156
157/// \brief Returns the absolute value of \p v
158/// Unlike the stdlib functions, this is inline and works with all types.
159template <typename T>
160inline T absv (T v)
161{
162    return (v < 0 ? -v : v);
163}
164
165/// \brief Returns -1 for negative values, 1 for positive, and 0 for 0
166template <typename T>
167inline T sign (T v)
168{
169    return ((0 < v) - (v < 0));
170}
171
172/// Returns the absolute value of the distance i1 and i2
173template <typename T1, typename T2>
174inline size_t abs_distance (T1 i1, T2 i2)
175{
176    return (absv (distance(i1, i2)));
177}
178
179/// Returns the size of \p n elements of size \p T
180template <typename T>
181inline size_t size_of_elements (size_t n, const T*)
182{
183    return (n * sizeof(T));
184}
185
186// Defined in byteswap.h, which is usually unusable.
187#undef bswap_16
188#undef bswap_32
189#undef bswap_64
190
191#if CPU_HAS_CMPXCHG8	// If it has that, it has bswap.
192inline uint16_t bswap_16 (uint16_t v)	{ asm ("rorw $8, %w0" : "=r"(v) : "0"(v) : "cc"); return (v); }
193inline uint32_t bswap_32 (uint32_t v)	{ asm ("bswap %0" : "=r"(v) : "0"(v)); return (v); }
194#else
195inline uint16_t bswap_16 (uint16_t v)	{ return (v << 8 | v >> 8); }
196inline uint32_t bswap_32 (uint32_t v)	{ return (v << 24 | (v & 0xFF00) << 8 | (v >> 8) & 0xFF00 | v >> 24); }
197#endif
198#if HAVE_INT64_T
199inline uint64_t bswap_64 (uint64_t v)	{ return ((uint64_t(bswap_32(v)) << 32) | bswap_32(v >> 32)); }
200#endif
201
202/// \brief Swaps the byteorder of \p v.
203template <typename T>
204inline T bswap (const T& v)
205{
206    switch (BitsInType(T)) {
207	default:	return (v);
208	case 16:	return (T (bswap_16 (uint16_t (v))));
209	case 32:	return (T (bswap_32 (uint32_t (v))));
210#if HAVE_INT64_T
211	case 64:	return (T (bswap_64 (uint64_t (v))));
212#endif
213    };
214}
215
216#if USTL_BYTE_ORDER == USTL_BIG_ENDIAN
217template <typename T> inline T le_to_native (const T& v) { return (bswap (v)); }
218template <typename T> inline T be_to_native (const T& v) { return (v); }
219template <typename T> inline T native_to_le (const T& v) { return (bswap (v)); }
220template <typename T> inline T native_to_be (const T& v) { return (v); }
221#elif USTL_BYTE_ORDER == USTL_LITTLE_ENDIAN
222template <typename T> inline T le_to_native (const T& v) { return (v); }
223template <typename T> inline T be_to_native (const T& v) { return (bswap (v)); }
224template <typename T> inline T native_to_le (const T& v) { return (v); }
225template <typename T> inline T native_to_be (const T& v) { return (bswap (v)); }
226#endif // USTL_BYTE_ORDER
227
228/// Deletes \p p and sets it to NULL
229template <typename T>
230inline void Delete (T*& p)
231{
232    delete p;
233    p = NULL;
234}
235
236/// Deletes \p p as an array and sets it to NULL
237template <typename T>
238inline void DeleteVector (T*& p)
239{
240    delete [] p;
241    p = NULL;
242}
243
244/// Template of making != from ! and ==
245template <typename T>
246inline bool operator!= (const T& x, const T& y)
247{
248    return (!(x == y));
249}
250
251/// Template of making > from <
252template <typename T>
253inline bool operator> (const T& x, const T& y)
254{
255    return (y < x);
256}
257
258/// Template of making <= from < and ==
259template <typename T>
260inline bool operator<= (const T& x, const T& y)
261{
262    return (!(y < x));
263}
264
265/// Template of making >= from < and ==
266template <typename T>
267inline bool operator>= (const T& x, const T& y)
268{
269    return (!(x < y));
270}
271
272/// Packs \p s multiple times into \p b. Useful for loop unrolling.
273template <typename TSmall, typename TBig>
274inline void pack_type (TSmall s, TBig& b)
275{
276    const size_t n = sizeof(TBig) / sizeof(TSmall);
277    b = s;
278    // Calls to min are here to avoid warnings for shifts bigger than the type. min will be gone when optimized.
279    if (n < 2) return;
280    b = (b << min (BitsInType(TSmall), BitsInType(TBig))) | b;
281    if (n < 4) return;
282    b = (b << min (BitsInType(TSmall) * 2, BitsInType(TBig))) | b;
283    if (n < 8) return;
284    b = (b << min (BitsInType(TSmall) * 4, BitsInType(TBig))) | b;
285}
286
287#if __GNUC__ >= 3
288inline bool TestAndSet (int* pm) __attribute__((always_inline));
289#endif
290/// Sets the contents of \p pm to 1 and returns true if the previous value was 0.
291inline bool TestAndSet (int* pm)
292{
293#if CPU_HAS_CMPXCHG8
294    bool rv;
295    int oldVal (1);
296    asm volatile ( // cmpxchg compares to %eax and swaps if equal
297	"cmpxchgl %3, %1\n\t"
298	"sete %0"
299	: "=a" (rv), "=m" (*pm), "=r" (oldVal)
300	: "2" (oldVal), "a" (0)
301	: "memory");
302    return (rv);
303#elif __i386__ || __x86_64__
304    int oldVal (1);
305    asm volatile ("xchgl %0, %1" : "=r"(oldVal), "=m"(*pm) : "0"(oldVal), "m"(*pm) : "memory");
306    return (!oldVal);
307#elif __sparc32__	// This has not been tested
308    int rv;
309    asm volatile ("ldstub %1, %0" : "=r"(rv), "=m"(*pm) : "m"(pm));
310    return (!rv);
311#else
312    const int oldVal (*pm);
313    *pm = 1;
314    return (!oldVal);
315#endif
316}
317
318/// \brief This template is to be used for dereferencing a type-punned pointer without a warning.
319///
320/// When casting a local variable to an unrelated type through a pointer (for
321/// example, casting a float to a uint32_t without conversion), the resulting
322/// memory location can be accessed through either pointer, which violates the
323/// strict aliasing rule. While -fno-strict-aliasing option can be given to
324/// the compiler, eliminating this warning, inefficient code may result in
325/// some instances, because aliasing inhibits some optimizations. By using
326/// this template, and by ensuring the memory is accessed in one way only,
327/// efficient code can be produced without the warning. For gcc 4.1.0+.
328///
329template <typename DEST, typename SRC>
330inline DEST noalias (DEST, SRC* s)
331{
332    union UPun { SRC s; DEST d; };
333    return (((UPun*)(s))->d);
334}
335
336namespace simd {
337    /// Call after you are done using SIMD algorithms for 64 bit tuples.
338#if CPU_HAS_MMX
339    inline void reset_mmx (void) __attribute__((always_inline));
340    #define ALL_MMX_REGS_CHANGELIST "mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7","st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)"
341    #if CPU_HAS_3DNOW
342	inline void reset_mmx (void) { asm ("femms":::ALL_MMX_REGS_CHANGELIST); }
343    #else
344	inline void reset_mmx (void) { asm ("emms":::ALL_MMX_REGS_CHANGELIST); }
345    #endif
346#else
347    inline void reset_mmx (void) {}
348#endif
349} // namespace simd
350
351/// \brief Type that is not size_t
352///
353/// Because size_t may be declared as unsigned long or unsigned int on
354/// different machines, this macro is convenient when defining overloads
355/// of size_t to use other types.
356///
357#if defined(SIZE_T_IS_LONG) && !defined(__ARM_EABI__)
358     #define NOT_SIZE_T_I_OR_L	unsigned int
359#else
360    #define NOT_SIZE_T_I_OR_L	unsigned long
361#endif
362
363/// \brief Required when you want to overload size_t and a pointer.
364///
365/// The compiler will happily cast a number to a pointer and declare
366/// that the overload is ambiguous unless you define overloads for all
367/// possible integral types that a number may represent. This behaviour,
368/// although braindead, is in the ANSI standard, and thus not a bug. If
369/// you want to change the standard, the best solution is to disallow any
370/// implicit casts to pointer from an integral type. Ironically, such an
371/// implicit cast is already detected by gcc.
372///
373#if defined(USTL_ANDROID_X86)
374#define OVERLOAD_POINTER_AND_SIZE_T_V2(name, arg1type)
375#else
376#define OVERLOAD_POINTER_AND_SIZE_T_V2(name, arg1type)						\
377    inline void	name (arg1type a1, short a2)			{ name (a1, size_t(a2)); }	\
378    inline void	name (arg1type a1, unsigned short a2)		{ name (a1, size_t(a2)); }	\
379    inline void	name (arg1type a1, int a2)			{ name (a1, size_t(a2)); }	\
380    inline void	name (arg1type a1, long a2)			{ name (a1, size_t(a2)); }	\
381    inline void	name (arg1type a1, NOT_SIZE_T_I_OR_L a2)	{ name (a1, size_t(a2)); }
382#endif
383} // namespace ustl
384
385
386#endif
387
388