1
2/********************************************************************
3 *                                                                  *
4 * THIS FILE IS PART OF THE 'ZYWRLE' VNC CODEC SOURCE CODE.         *
5 *                                                                  *
6 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
7 * GOVERNED BY A FOLLOWING BSD-STYLE SOURCE LICENSE.                *
8 * PLEASE READ THESE TERMS BEFORE DISTRIBUTING.                     *
9 *                                                                  *
10 * THE 'ZYWRLE' VNC CODEC SOURCE CODE IS (C) COPYRIGHT 2006         *
11 * BY Hitachi Systems & Services, Ltd.                              *
12 * (Noriaki Yamazaki, Research & Developement Center)               *                                                                 *
13 *                                                                  *
14 ********************************************************************
15Redistribution and use in source and binary forms, with or without
16modification, are permitted provided that the following conditions
17are met:
18
19- Redistributions of source code must retain the above copyright
20notice, this list of conditions and the following disclaimer.
21
22- Redistributions in binary form must reproduce the above copyright
23notice, this list of conditions and the following disclaimer in the
24documentation and/or other materials provided with the distribution.
25
26- Neither the name of the Hitachi Systems & Services, Ltd. nor
27the names of its contributors may be used to endorse or promote
28products derived from this software without specific prior written
29permission.
30
31THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
34A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION
35OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
37LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
41OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 ********************************************************************/
43
44/* Change Log:
45     V0.02 : 2008/02/04 : Fix mis encode/decode when width != scanline
46	                     (Thanks Johannes Schindelin, author of LibVNC
47						  Server/Client)
48     V0.01 : 2007/02/06 : Initial release
49*/
50
51/* #define ZYWRLE_ENCODE */
52/* #define ZYWRLE_DECODE */
53#define ZYWRLE_QUANTIZE
54
55/*
56[References]
57 PLHarr:
58   Senecal, J. G., P. Lindstrom, M. A. Duchaineau, and K. I. Joy, "An Improved N-Bit to N-Bit Reversible Haar-Like Transform," Pacific Graphics 2004, October 2004, pp. 371-380.
59 EZW:
60   Shapiro, JM: Embedded Image Coding Using Zerotrees of Wavelet Coefficients, IEEE Trans. Signal. Process., Vol.41, pp.3445-3462 (1993).
61*/
62
63
64/* Template Macro stuffs. */
65#undef ZYWRLE_ANALYZE
66#undef ZYWRLE_SYNTHESIZE
67#define ZYWRLE_ANALYZE __RFB_CONCAT3E(zywrleAnalyze,BPP,END_FIX)
68#define ZYWRLE_SYNTHESIZE __RFB_CONCAT3E(zywrleSynthesize,BPP,END_FIX)
69
70#define ZYWRLE_RGBYUV __RFB_CONCAT3E(zywrleRGBYUV,BPP,END_FIX)
71#define ZYWRLE_YUVRGB __RFB_CONCAT3E(zywrleYUVRGB,BPP,END_FIX)
72#define ZYWRLE_YMASK __RFB_CONCAT2E(ZYWRLE_YMASK,BPP)
73#define ZYWRLE_UVMASK __RFB_CONCAT2E(ZYWRLE_UVMASK,BPP)
74#define ZYWRLE_LOAD_PIXEL __RFB_CONCAT2E(ZYWRLE_LOAD_PIXEL,BPP)
75#define ZYWRLE_SAVE_PIXEL __RFB_CONCAT2E(ZYWRLE_SAVE_PIXEL,BPP)
76
77/* Packing/Unpacking pixel stuffs.
78   Endian conversion stuffs. */
79#undef S_0
80#undef S_1
81#undef L_0
82#undef L_1
83#undef L_2
84#if ZYWRLE_ENDIAN == ENDIAN_BIG
85#  define S_0	1
86#  define S_1	0
87#  define L_0	3
88#  define L_1	2
89#  define L_2	1
90#else
91#  define S_0	0
92#  define S_1	1
93#  define L_0	0
94#  define L_1	1
95#  define L_2	2
96#endif
97
98/*   Load/Save pixel stuffs. */
99#define ZYWRLE_YMASK15  0xFFFFFFF8
100#define ZYWRLE_UVMASK15 0xFFFFFFF8
101#define ZYWRLE_LOAD_PIXEL15(pSrc,R,G,B) { \
102	R =  (((unsigned char*)pSrc)[S_1]<< 1)& 0xF8;	\
103	G = ((((unsigned char*)pSrc)[S_1]<< 6)|(((unsigned char*)pSrc)[S_0]>> 2))& 0xF8;	\
104	B =  (((unsigned char*)pSrc)[S_0]<< 3)& 0xF8;	\
105}
106#define ZYWRLE_SAVE_PIXEL15(pDst,R,G,B) { \
107	R &= 0xF8;	\
108	G &= 0xF8;	\
109	B &= 0xF8;	\
110	((unsigned char*)pDst)[S_1] = (unsigned char)( (R>>1)|(G>>6)       );	\
111	((unsigned char*)pDst)[S_0] = (unsigned char)(((B>>3)|(G<<2))& 0xFF);	\
112}
113#define ZYWRLE_YMASK16  0xFFFFFFFC
114#define ZYWRLE_UVMASK16 0xFFFFFFF8
115#define ZYWRLE_LOAD_PIXEL16(pSrc,R,G,B) { \
116	R =   ((unsigned char*)pSrc)[S_1]     & 0xF8;	\
117	G = ((((unsigned char*)pSrc)[S_1]<< 5)|(((unsigned char*)pSrc)[S_0]>> 3))& 0xFC;	\
118	B =  (((unsigned char*)pSrc)[S_0]<< 3)& 0xF8;	\
119}
120#define ZYWRLE_SAVE_PIXEL16(pDst,R,G,B) { \
121	R &= 0xF8;	\
122	G &= 0xFC;	\
123	B &= 0xF8;	\
124	((unsigned char*)pDst)[S_1] = (unsigned char)(  R    |(G>>5)       );	\
125	((unsigned char*)pDst)[S_0] = (unsigned char)(((B>>3)|(G<<3))& 0xFF);	\
126}
127#define ZYWRLE_YMASK32  0xFFFFFFFF
128#define ZYWRLE_UVMASK32 0xFFFFFFFF
129#define ZYWRLE_LOAD_PIXEL32(pSrc,R,G,B) { \
130	R = ((unsigned char*)pSrc)[L_2];	\
131	G = ((unsigned char*)pSrc)[L_1];	\
132	B = ((unsigned char*)pSrc)[L_0];	\
133}
134#define ZYWRLE_SAVE_PIXEL32(pDst,R,G,B) { \
135	((unsigned char*)pDst)[L_2] = (unsigned char)R;	\
136	((unsigned char*)pDst)[L_1] = (unsigned char)G;	\
137	((unsigned char*)pDst)[L_0] = (unsigned char)B;	\
138}
139
140#ifndef ZYWRLE_ONCE
141#define ZYWRLE_ONCE
142
143#ifdef WIN32
144#define InlineX __inline
145#else
146# ifndef __STRICT_ANSI__
147#  define InlineX inline
148# else
149#  define InlineX
150# endif
151#endif
152
153#ifdef ZYWRLE_ENCODE
154/* Tables for Coefficients filtering. */
155#  ifndef ZYWRLE_QUANTIZE
156/* Type A:lower bit omitting of EZW style. */
157const static unsigned int zywrleParam[3][3]={
158	{0x0000F000,0x00000000,0x00000000},
159	{0x0000C000,0x00F0F0F0,0x00000000},
160	{0x0000C000,0x00C0C0C0,0x00F0F0F0},
161/*	{0x0000FF00,0x00000000,0x00000000},
162	{0x0000FF00,0x00FFFFFF,0x00000000},
163	{0x0000FF00,0x00FFFFFF,0x00FFFFFF}, */
164};
165#  else
166/* Type B:Non liner quantization filter. */
167static const signed char zywrleConv[4][256]={
168{	/* bi=5, bo=5 r=0.0:PSNR=24.849 */
169	0, 0, 0, 0, 0, 0, 0, 0,
170	0, 0, 0, 0, 0, 0, 0, 0,
171	0, 0, 0, 0, 0, 0, 0, 0,
172	0, 0, 0, 0, 0, 0, 0, 0,
173	0, 0, 0, 0, 0, 0, 0, 0,
174	0, 0, 0, 0, 0, 0, 0, 0,
175	0, 0, 0, 0, 0, 0, 0, 0,
176	0, 0, 0, 0, 0, 0, 0, 0,
177	0, 0, 0, 0, 0, 0, 0, 0,
178	0, 0, 0, 0, 0, 0, 0, 0,
179	0, 0, 0, 0, 0, 0, 0, 0,
180	0, 0, 0, 0, 0, 0, 0, 0,
181	0, 0, 0, 0, 0, 0, 0, 0,
182	0, 0, 0, 0, 0, 0, 0, 0,
183	0, 0, 0, 0, 0, 0, 0, 0,
184	0, 0, 0, 0, 0, 0, 0, 0,
185	0, 0, 0, 0, 0, 0, 0, 0,
186	0, 0, 0, 0, 0, 0, 0, 0,
187	0, 0, 0, 0, 0, 0, 0, 0,
188	0, 0, 0, 0, 0, 0, 0, 0,
189	0, 0, 0, 0, 0, 0, 0, 0,
190	0, 0, 0, 0, 0, 0, 0, 0,
191	0, 0, 0, 0, 0, 0, 0, 0,
192	0, 0, 0, 0, 0, 0, 0, 0,
193	0, 0, 0, 0, 0, 0, 0, 0,
194	0, 0, 0, 0, 0, 0, 0, 0,
195	0, 0, 0, 0, 0, 0, 0, 0,
196	0, 0, 0, 0, 0, 0, 0, 0,
197	0, 0, 0, 0, 0, 0, 0, 0,
198	0, 0, 0, 0, 0, 0, 0, 0,
199	0, 0, 0, 0, 0, 0, 0, 0,
200	0, 0, 0, 0, 0, 0, 0, 0,
201},
202{	/* bi=5, bo=5 r=2.0:PSNR=74.031 */
203	0, 0, 0, 0, 0, 0, 0, 0,
204	0, 0, 0, 0, 0, 0, 0, 0,
205	0, 0, 0, 0, 0, 0, 0, 32,
206	32, 32, 32, 32, 32, 32, 32, 32,
207	32, 32, 32, 32, 32, 32, 32, 32,
208	48, 48, 48, 48, 48, 48, 48, 48,
209	48, 48, 48, 56, 56, 56, 56, 56,
210	56, 56, 56, 56, 64, 64, 64, 64,
211	64, 64, 64, 64, 72, 72, 72, 72,
212	72, 72, 72, 72, 80, 80, 80, 80,
213	80, 80, 88, 88, 88, 88, 88, 88,
214	88, 88, 88, 88, 88, 88, 96, 96,
215	96, 96, 96, 104, 104, 104, 104, 104,
216	104, 104, 104, 104, 104, 112, 112, 112,
217	112, 112, 112, 112, 112, 112, 120, 120,
218	120, 120, 120, 120, 120, 120, 120, 120,
219	0, -120, -120, -120, -120, -120, -120, -120,
220	-120, -120, -120, -112, -112, -112, -112, -112,
221	-112, -112, -112, -112, -104, -104, -104, -104,
222	-104, -104, -104, -104, -104, -104, -96, -96,
223	-96, -96, -96, -88, -88, -88, -88, -88,
224	-88, -88, -88, -88, -88, -88, -88, -80,
225	-80, -80, -80, -80, -80, -72, -72, -72,
226	-72, -72, -72, -72, -72, -64, -64, -64,
227	-64, -64, -64, -64, -64, -56, -56, -56,
228	-56, -56, -56, -56, -56, -56, -48, -48,
229	-48, -48, -48, -48, -48, -48, -48, -48,
230	-48, -32, -32, -32, -32, -32, -32, -32,
231	-32, -32, -32, -32, -32, -32, -32, -32,
232	-32, -32, 0, 0, 0, 0, 0, 0,
233	0, 0, 0, 0, 0, 0, 0, 0,
234	0, 0, 0, 0, 0, 0, 0, 0,
235},
236{	/* bi=5, bo=4 r=2.0:PSNR=64.441 */
237	0, 0, 0, 0, 0, 0, 0, 0,
238	0, 0, 0, 0, 0, 0, 0, 0,
239	0, 0, 0, 0, 0, 0, 0, 0,
240	0, 0, 0, 0, 0, 0, 0, 0,
241	48, 48, 48, 48, 48, 48, 48, 48,
242	48, 48, 48, 48, 48, 48, 48, 48,
243	48, 48, 48, 48, 48, 48, 48, 48,
244	64, 64, 64, 64, 64, 64, 64, 64,
245	64, 64, 64, 64, 64, 64, 64, 64,
246	80, 80, 80, 80, 80, 80, 80, 80,
247	80, 80, 80, 80, 80, 88, 88, 88,
248	88, 88, 88, 88, 88, 88, 88, 88,
249	104, 104, 104, 104, 104, 104, 104, 104,
250	104, 104, 104, 112, 112, 112, 112, 112,
251	112, 112, 112, 112, 120, 120, 120, 120,
252	120, 120, 120, 120, 120, 120, 120, 120,
253	0, -120, -120, -120, -120, -120, -120, -120,
254	-120, -120, -120, -120, -120, -112, -112, -112,
255	-112, -112, -112, -112, -112, -112, -104, -104,
256	-104, -104, -104, -104, -104, -104, -104, -104,
257	-104, -88, -88, -88, -88, -88, -88, -88,
258	-88, -88, -88, -88, -80, -80, -80, -80,
259	-80, -80, -80, -80, -80, -80, -80, -80,
260	-80, -64, -64, -64, -64, -64, -64, -64,
261	-64, -64, -64, -64, -64, -64, -64, -64,
262	-64, -48, -48, -48, -48, -48, -48, -48,
263	-48, -48, -48, -48, -48, -48, -48, -48,
264	-48, -48, -48, -48, -48, -48, -48, -48,
265	-48, 0, 0, 0, 0, 0, 0, 0,
266	0, 0, 0, 0, 0, 0, 0, 0,
267	0, 0, 0, 0, 0, 0, 0, 0,
268	0, 0, 0, 0, 0, 0, 0, 0,
269},
270{	/* bi=5, bo=2 r=2.0:PSNR=43.175 */
271	0, 0, 0, 0, 0, 0, 0, 0,
272	0, 0, 0, 0, 0, 0, 0, 0,
273	0, 0, 0, 0, 0, 0, 0, 0,
274	0, 0, 0, 0, 0, 0, 0, 0,
275	0, 0, 0, 0, 0, 0, 0, 0,
276	0, 0, 0, 0, 0, 0, 0, 0,
277	0, 0, 0, 0, 0, 0, 0, 0,
278	0, 0, 0, 0, 0, 0, 0, 0,
279	88, 88, 88, 88, 88, 88, 88, 88,
280	88, 88, 88, 88, 88, 88, 88, 88,
281	88, 88, 88, 88, 88, 88, 88, 88,
282	88, 88, 88, 88, 88, 88, 88, 88,
283	88, 88, 88, 88, 88, 88, 88, 88,
284	88, 88, 88, 88, 88, 88, 88, 88,
285	88, 88, 88, 88, 88, 88, 88, 88,
286	88, 88, 88, 88, 88, 88, 88, 88,
287	0, -88, -88, -88, -88, -88, -88, -88,
288	-88, -88, -88, -88, -88, -88, -88, -88,
289	-88, -88, -88, -88, -88, -88, -88, -88,
290	-88, -88, -88, -88, -88, -88, -88, -88,
291	-88, -88, -88, -88, -88, -88, -88, -88,
292	-88, -88, -88, -88, -88, -88, -88, -88,
293	-88, -88, -88, -88, -88, -88, -88, -88,
294	-88, -88, -88, -88, -88, -88, -88, -88,
295	-88, 0, 0, 0, 0, 0, 0, 0,
296	0, 0, 0, 0, 0, 0, 0, 0,
297	0, 0, 0, 0, 0, 0, 0, 0,
298	0, 0, 0, 0, 0, 0, 0, 0,
299	0, 0, 0, 0, 0, 0, 0, 0,
300	0, 0, 0, 0, 0, 0, 0, 0,
301	0, 0, 0, 0, 0, 0, 0, 0,
302	0, 0, 0, 0, 0, 0, 0, 0,
303}
304};
305const static signed char* zywrleParam[3][3][3]={
306	{{zywrleConv[0],zywrleConv[2],zywrleConv[0]},{zywrleConv[0],zywrleConv[0],zywrleConv[0]},{zywrleConv[0],zywrleConv[0],zywrleConv[0]}},
307	{{zywrleConv[0],zywrleConv[3],zywrleConv[0]},{zywrleConv[1],zywrleConv[1],zywrleConv[1]},{zywrleConv[0],zywrleConv[0],zywrleConv[0]}},
308	{{zywrleConv[0],zywrleConv[3],zywrleConv[0]},{zywrleConv[2],zywrleConv[2],zywrleConv[2]},{zywrleConv[1],zywrleConv[1],zywrleConv[1]}},
309};
310#  endif
311#endif
312
313static InlineX void Harr(signed char* pX0, signed char* pX1)
314{
315	/* Piecewise-Linear Harr(PLHarr) */
316	int X0 = (int)*pX0, X1 = (int)*pX1;
317	int orgX0 = X0, orgX1 = X1;
318	if ((X0 ^ X1) & 0x80) {
319		/* differ sign */
320		X1 += X0;
321		if (((X1^orgX1)&0x80)==0) {
322			/* |X1| > |X0| */
323			X0 -= X1;	/* H = -B */
324		}
325	} else {
326		/* same sign */
327		X0 -= X1;
328		if (((X0 ^ orgX0) & 0x80) == 0) {
329			/* |X0| > |X1| */
330			X1 += X0;	/* L = A */
331		}
332	}
333	*pX0 = (signed char)X1;
334	*pX1 = (signed char)X0;
335}
336/*
337 1D-Wavelet transform.
338
339 In coefficients array, the famous 'pyramid' decomposition is well used.
340
341 1D Model:
342   |L0L0L0L0|L0L0L0L0|H0H0H0H0|H0H0H0H0| : level 0
343   |L1L1L1L1|H1H1H1H1|H0H0H0H0|H0H0H0H0| : level 1
344
345 But this method needs line buffer because H/L is different position from X0/X1.
346 So, I used 'interleave' decomposition instead of it.
347
348 1D Model:
349   |L0H0L0H0|L0H0L0H0|L0H0L0H0|L0H0L0H0| : level 0
350   |L1H0H1H0|L1H0H1H0|L1H0H1H0|L1H0H1H0| : level 1
351
352 In this method, H/L and X0/X1 is always same position.
353 This lead us to more speed and less memory.
354 Of cause, the result of both method is quite same
355 because it's only difference that coefficient position.
356*/
357static InlineX void WaveletLevel(int* data, int size, int l, int SkipPixel)
358{
359	int s, ofs;
360	signed char* pX0;
361	signed char* end;
362
363	pX0 = (signed char*)data;
364	s = (8<<l)*SkipPixel;
365	end = pX0+(size>>(l+1))*s;
366	s -= 2;
367	ofs = (4<<l)*SkipPixel;
368	while (pX0 < end) {
369		Harr(pX0, pX0+ofs);
370		pX0++;
371		Harr(pX0, pX0+ofs);
372		pX0++;
373		Harr(pX0, pX0+ofs);
374		pX0 += s;
375	}
376}
377#define InvWaveletLevel(d,s,l,pix) WaveletLevel(d,s,l,pix)
378
379#ifdef ZYWRLE_ENCODE
380#  ifndef ZYWRLE_QUANTIZE
381/* Type A:lower bit omitting of EZW style. */
382static InlineX void FilterWaveletSquare(int* pBuf, int width, int height, int level, int l)
383{
384	int r, s;
385	int x, y;
386	int* pH;
387	const unsigned int* pM;
388
389	pM = &(zywrleParam[level-1][l]);
390	s = 2<<l;
391	for (r = 1; r < 4; r++) {
392		pH   = pBuf;
393		if (r & 0x01)
394			pH +=  s>>1;
395		if (r & 0x02)
396			pH += (s>>1)*width;
397		for (y = 0; y < height / s; y++) {
398			for (x = 0; x < width / s; x++) {
399				/*
400				 these are same following code.
401				     pH[x] = pH[x] / (~pM[x]+1) * (~pM[x]+1);
402				     ( round pH[x] with pM[x] bit )
403				 '&' operator isn't 'round' but is 'floor'.
404				 So, we must offset when pH[x] is negative.
405				*/
406				if (((signed char*)pH)[0] & 0x80)
407					((signed char*)pH)[0] += ~((signed char*)pM)[0];
408				if (((signed char*)pH)[1] & 0x80)
409					((signed char*)pH)[1] += ~((signed char*)pM)[1];
410				if (((signed char*)pH)[2] & 0x80)
411					((signed char*)pH)[2] += ~((signed char*)pM)[2];
412				*pH &= *pM;
413				pH += s;
414			}
415			pH += (s-1)*width;
416		}
417	}
418}
419#  else
420/*
421 Type B:Non liner quantization filter.
422
423 Coefficients have Gaussian curve and smaller value which is
424 large part of coefficients isn't more important than larger value.
425 So, I use filter of Non liner quantize/dequantize table.
426 In general, Non liner quantize formula is explained as following.
427
428    y=f(x)   = sign(x)*round( ((abs(x)/(2^7))^ r   )* 2^(bo-1) )*2^(8-bo)
429    x=f-1(y) = sign(y)*round( ((abs(y)/(2^7))^(1/r))* 2^(bi-1) )*2^(8-bi)
430 ( r:power coefficient  bi:effective MSB in input  bo:effective MSB in output )
431
432   r < 1.0 : Smaller value is more important than larger value.
433   r > 1.0 : Larger value is more important than smaller value.
434   r = 1.0 : Liner quantization which is same with EZW style.
435
436 r = 0.75 is famous non liner quantization used in MP3 audio codec.
437 In contrast to audio data, larger value is important in wavelet coefficients.
438 So, I select r = 2.0 table( quantize is x^2, dequantize sqrt(x) ).
439
440 As compared with EZW style liner quantization, this filter tended to be
441 more sharp edge and be more compression rate but be more blocking noise and be less quality.
442 Especially, the surface of graphic objects has distinguishable noise in middle quality mode.
443
444 We need only quantized-dequantized(filtered) value rather than quantized value itself
445 because all values are packed or palette-lized in later ZRLE section.
446 This lead us not to need to modify client decoder when we change
447 the filtering procedure in future.
448 Client only decodes coefficients given by encoder.
449*/
450static InlineX void FilterWaveletSquare(int* pBuf, int width, int height, int level, int l)
451{
452	int r, s;
453	int x, y;
454	int* pH;
455	const signed char** pM;
456
457	pM = zywrleParam[level-1][l];
458	s = 2<<l;
459	for (r = 1; r < 4; r++) {
460		pH   = pBuf;
461		if (r & 0x01)
462			pH +=  s>>1;
463		if (r & 0x02)
464			pH += (s>>1)*width;
465		for (y = 0; y < height / s; y++) {
466			for (x = 0; x < width / s; x++) {
467				((signed char*)pH)[0] = pM[0][((unsigned char*)pH)[0]];
468				((signed char*)pH)[1] = pM[1][((unsigned char*)pH)[1]];
469				((signed char*)pH)[2] = pM[2][((unsigned char*)pH)[2]];
470				pH += s;
471			}
472			pH += (s-1)*width;
473		}
474	}
475}
476#  endif
477
478static InlineX void Wavelet(int* pBuf, int width, int height, int level)
479{
480	int l, s;
481	int* pTop;
482	int* pEnd;
483
484	for (l = 0; l < level; l++) {
485		pTop = pBuf;
486		pEnd = pBuf+height*width;
487		s = width<<l;
488		while (pTop < pEnd) {
489			WaveletLevel(pTop, width, l, 1);
490			pTop += s;
491		}
492		pTop = pBuf;
493		pEnd = pBuf+width;
494		s = 1<<l;
495		while (pTop < pEnd) {
496			WaveletLevel(pTop, height,l, width);
497			pTop += s;
498		}
499		FilterWaveletSquare(pBuf, width, height, level, l);
500	}
501}
502#endif
503#ifdef ZYWRLE_DECODE
504static InlineX void InvWavelet(int* pBuf, int width, int height, int level)
505{
506	int l, s;
507	int* pTop;
508	int* pEnd;
509
510	for (l = level - 1; l >= 0; l--) {
511		pTop = pBuf;
512		pEnd = pBuf+width;
513		s = 1<<l;
514		while (pTop < pEnd) {
515			InvWaveletLevel(pTop, height,l, width);
516			pTop += s;
517		}
518		pTop = pBuf;
519		pEnd = pBuf+height*width;
520		s = width<<l;
521		while (pTop < pEnd) {
522			InvWaveletLevel(pTop, width, l, 1);
523			pTop += s;
524		}
525	}
526}
527#endif
528
529/* Load/Save coefficients stuffs.
530 Coefficients manages as 24 bits little-endian pixel. */
531#define ZYWRLE_LOAD_COEFF(pSrc,R,G,B) { \
532	R = ((signed char*)pSrc)[2];	\
533	G = ((signed char*)pSrc)[1];	\
534	B = ((signed char*)pSrc)[0];	\
535}
536#define ZYWRLE_SAVE_COEFF(pDst,R,G,B) { \
537	((signed char*)pDst)[2] = (signed char)R;	\
538	((signed char*)pDst)[1] = (signed char)G;	\
539	((signed char*)pDst)[0] = (signed char)B;	\
540}
541
542/*
543 RGB <=> YUV conversion stuffs.
544 YUV coversion is explained as following formula in strict meaning:
545   Y =  0.299R + 0.587G + 0.114B (   0<=Y<=255)
546   U = -0.169R - 0.331G + 0.500B (-128<=U<=127)
547   V =  0.500R - 0.419G - 0.081B (-128<=V<=127)
548
549 I use simple conversion RCT(reversible color transform) which is described
550 in JPEG-2000 specification.
551   Y = (R + 2G + B)/4 (   0<=Y<=255)
552   U = B-G (-256<=U<=255)
553   V = R-G (-256<=V<=255)
554*/
555#define ROUND(x) (((x)<0)?0:(((x)>255)?255:(x)))
556	/* RCT is N-bit RGB to N-bit Y and N+1-bit UV.
557	 For make Same N-bit, UV is lossy.
558	 More exact PLHarr, we reduce to odd range(-127<=x<=127). */
559#define ZYWRLE_RGBYUV1(R,G,B,Y,U,V,ymask,uvmask) { \
560	Y = (R+(G<<1)+B)>>2;	\
561	U =  B-G;	\
562	V =  R-G;	\
563	Y -= 128;	\
564	U >>= 1;	\
565	V >>= 1;	\
566	Y &= ymask;	\
567	U &= uvmask;	\
568	V &= uvmask;	\
569	if (Y == -128)	\
570		Y += (0xFFFFFFFF-ymask+1);	\
571	if (U == -128)	\
572		U += (0xFFFFFFFF-uvmask+1);	\
573	if (V == -128)	\
574		V += (0xFFFFFFFF-uvmask+1);	\
575}
576#define ZYWRLE_YUVRGB1(R,G,B,Y,U,V) { \
577	Y += 128;	\
578	U <<= 1;	\
579	V <<= 1;	\
580	G = Y-((U+V)>>2);	\
581	B = U+G;	\
582	R = V+G;	\
583	G = ROUND(G);	\
584	B = ROUND(B);	\
585	R = ROUND(R);	\
586}
587
588/*
589 coefficient packing/unpacking stuffs.
590 Wavelet transform makes 4 sub coefficient image from 1 original image.
591
592 model with pyramid decomposition:
593   +------+------+
594   |      |      |
595   |  L   |  Hx  |
596   |      |      |
597   +------+------+
598   |      |      |
599   |  H   |  Hxy |
600   |      |      |
601   +------+------+
602
603 So, we must transfer each sub images individually in strict meaning.
604 But at least ZRLE meaning, following one decompositon image is same as
605 avobe individual sub image. I use this format.
606 (Strictly saying, transfer order is reverse(Hxy->Hy->Hx->L)
607  for simplified procedure for any wavelet level.)
608
609   +------+------+
610   |      L      |
611   +------+------+
612   |      Hx     |
613   +------+------+
614   |      Hy     |
615   +------+------+
616   |      Hxy    |
617   +------+------+
618*/
619#define INC_PTR(data) \
620	data++;	\
621	if( data-pData >= (w+uw) ){	\
622		data += scanline-(w+uw);	\
623		pData = data;	\
624	}
625
626#define ZYWRLE_TRANSFER_COEFF(pBuf,data,r,w,h,scanline,level,TRANS)	\
627	pH = pBuf;	\
628	s = 2<<level;	\
629	if (r & 0x01)	\
630		pH +=  s>>1;	\
631	if (r & 0x02)	\
632		pH += (s>>1)*w;	\
633	pEnd = pH+h*w;	\
634	while (pH < pEnd) {	\
635		pLine = pH+w;	\
636		while (pH < pLine) {	\
637			TRANS	\
638			INC_PTR(data)	\
639			pH += s;	\
640		}	\
641		pH += (s-1)*w;	\
642	}
643
644#define ZYWRLE_PACK_COEFF(pBuf,data,r,width,height,scanline,level)	\
645	ZYWRLE_TRANSFER_COEFF(pBuf,data,r,width,height,scanline,level,ZYWRLE_LOAD_COEFF(pH,R,G,B);ZYWRLE_SAVE_PIXEL(data,R,G,B);)
646
647#define ZYWRLE_UNPACK_COEFF(pBuf,data,r,width,height,scanline,level)	\
648	ZYWRLE_TRANSFER_COEFF(pBuf,data,r,width,height,scanline,level,ZYWRLE_LOAD_PIXEL(data,R,G,B);ZYWRLE_SAVE_COEFF(pH,R,G,B);)
649
650#define ZYWRLE_SAVE_UNALIGN(data,TRANS)	\
651	pTop = pBuf+w*h;	\
652	pEnd = pBuf + (w+uw)*(h+uh);	\
653	while (pTop < pEnd) {	\
654		TRANS	\
655		INC_PTR(data)	\
656		pTop++;	\
657	}
658
659#define ZYWRLE_LOAD_UNALIGN(data,TRANS)	\
660	pTop = pBuf+w*h;	\
661	if (uw) {	\
662		pData=         data + w;	\
663		pEnd = (int*)(pData+ h*scanline);	\
664		while (pData < (PIXEL_T*)pEnd) {	\
665			pLine = (int*)(pData + uw);	\
666			while (pData < (PIXEL_T*)pLine) {	\
667				TRANS	\
668				pData++;	\
669				pTop++;	\
670			}	\
671			pData += scanline-uw;	\
672		}	\
673	}	\
674	if (uh) {	\
675		pData=         data +  h*scanline;	\
676		pEnd = (int*)(pData+ uh*scanline);	\
677		while (pData < (PIXEL_T*)pEnd) {	\
678			pLine = (int*)(pData + w);	\
679			while (pData < (PIXEL_T*)pLine) {	\
680				TRANS	\
681				pData++;	\
682				pTop++;	\
683			}	\
684			pData += scanline-w;	\
685		}	\
686	}	\
687	if (uw && uh) {	\
688		pData=         data + w+ h*scanline;	\
689		pEnd = (int*)(pData+   uh*scanline);	\
690		while (pData < (PIXEL_T*)pEnd) {	\
691			pLine = (int*)(pData + uw);	\
692			while (pData < (PIXEL_T*)pLine) {	\
693				TRANS	\
694				pData++;	\
695				pTop++;	\
696			}	\
697			pData += scanline-uw;	\
698		}	\
699	}
700
701static InlineX void zywrleCalcSize(int* pW, int* pH, int level)
702{
703	*pW &= ~((1<<level)-1);
704	*pH &= ~((1<<level)-1);
705}
706
707#endif /* ZYWRLE_ONCE */
708
709#ifndef CPIXEL
710#ifdef ZYWRLE_ENCODE
711static InlineX void ZYWRLE_RGBYUV(int* pBuf, PIXEL_T* data, int width, int height, int scanline)
712{
713	int R, G, B;
714	int Y, U, V;
715	int* pLine;
716	int* pEnd;
717	pEnd = pBuf+height*width;
718	while (pBuf < pEnd) {
719		pLine = pBuf+width;
720		while (pBuf < pLine) {
721			ZYWRLE_LOAD_PIXEL(data,R,G,B);
722			ZYWRLE_RGBYUV1(R,G,B,Y,U,V,ZYWRLE_YMASK,ZYWRLE_UVMASK);
723			ZYWRLE_SAVE_COEFF(pBuf,V,Y,U);
724			pBuf++;
725			data++;
726		}
727		data += scanline-width;
728	}
729}
730#endif
731#ifdef ZYWRLE_DECODE
732static InlineX void ZYWRLE_YUVRGB(int* pBuf, PIXEL_T* data, int width, int height, int scanline) {
733	int R, G, B;
734	int Y, U, V;
735	int* pLine;
736	int* pEnd;
737	pEnd = pBuf+height*width;
738	while (pBuf < pEnd) {
739		pLine = pBuf+width;
740		while (pBuf < pLine) {
741			ZYWRLE_LOAD_COEFF(pBuf,V,Y,U);
742			ZYWRLE_YUVRGB1(R,G,B,Y,U,V);
743			ZYWRLE_SAVE_PIXEL(data,R,G,B);
744			pBuf++;
745			data++;
746		}
747		data += scanline-width;
748	}
749}
750#endif
751
752#ifdef ZYWRLE_ENCODE
753PIXEL_T* ZYWRLE_ANALYZE(PIXEL_T* dst, PIXEL_T* src, int w, int h, int scanline, int level, int* pBuf) {
754	int l;
755	int uw = w;
756	int uh = h;
757	int* pTop;
758	int* pEnd;
759	int* pLine;
760	PIXEL_T* pData;
761	int R, G, B;
762	int s;
763	int* pH;
764
765	zywrleCalcSize(&w, &h, level);
766	if (w == 0 || h == 0)
767		return NULL;
768	uw -= w;
769	uh -= h;
770
771	pData = dst;
772	ZYWRLE_LOAD_UNALIGN(src,*(PIXEL_T*)pTop=*pData;)
773	ZYWRLE_RGBYUV(pBuf, src, w, h, scanline);
774	Wavelet(pBuf, w, h, level);
775	for (l = 0; l < level; l++) {
776		ZYWRLE_PACK_COEFF(pBuf, dst, 3, w, h, scanline, l);
777		ZYWRLE_PACK_COEFF(pBuf, dst, 2, w, h, scanline, l);
778		ZYWRLE_PACK_COEFF(pBuf, dst, 1, w, h, scanline, l);
779		if (l == level - 1) {
780			ZYWRLE_PACK_COEFF(pBuf, dst, 0, w, h, scanline, l);
781		}
782	}
783	ZYWRLE_SAVE_UNALIGN(dst,*dst=*(PIXEL_T*)pTop;)
784	return dst;
785}
786#endif
787#ifdef ZYWRLE_DECODE
788PIXEL_T* ZYWRLE_SYNTHESIZE(PIXEL_T* dst, PIXEL_T* src, int w, int h, int scanline, int level, int* pBuf)
789{
790	int l;
791	int uw = w;
792	int uh = h;
793	int* pTop;
794	int* pEnd;
795	int* pLine;
796	PIXEL_T* pData;
797	int R, G, B;
798	int s;
799	int* pH;
800
801	zywrleCalcSize(&w, &h, level);
802	if (w == 0 || h == 0)
803		return NULL;
804	uw -= w;
805	uh -= h;
806
807	pData = src;
808	for (l = 0; l < level; l++) {
809		ZYWRLE_UNPACK_COEFF(pBuf, src, 3, w, h, scanline, l);
810		ZYWRLE_UNPACK_COEFF(pBuf, src, 2, w, h, scanline, l);
811		ZYWRLE_UNPACK_COEFF(pBuf, src, 1, w, h, scanline, l);
812		if (l == level - 1) {
813			ZYWRLE_UNPACK_COEFF(pBuf, src, 0, w, h, scanline, l);
814		}
815	}
816	ZYWRLE_SAVE_UNALIGN(src,*(PIXEL_T*)pTop=*src;)
817	InvWavelet(pBuf, w, h, level);
818	ZYWRLE_YUVRGB(pBuf, dst, w, h, scanline);
819	ZYWRLE_LOAD_UNALIGN(dst,*pData=*(PIXEL_T*)pTop;)
820	return src;
821}
822#endif
823#endif  /* CPIXEL */
824
825#undef ZYWRLE_RGBYUV
826#undef ZYWRLE_YUVRGB
827#undef ZYWRLE_LOAD_PIXEL
828#undef ZYWRLE_SAVE_PIXEL
829