1/* 2 * The copyright in this software is being made available under the 2-clauses 3 * BSD License, included below. This software may be subject to other third 4 * party and contributor rights, including patent rights, and no such rights 5 * are granted under this license. 6 * 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium 8 * Copyright (c) 2002-2014, Professor Benoit Macq 9 * Copyright (c) 2001-2003, David Janssens 10 * Copyright (c) 2002-2003, Yannick Verschueren 11 * Copyright (c) 2003-2007, Francois-Olivier Devaux 12 * Copyright (c) 2003-2014, Antonin Descampe 13 * Copyright (c) 2005, Herve Drolon, FreeImage Team 14 * Copyright (c) 2007, Jonathan Ballard <dzonatas@dzonux.net> 15 * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com> 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' 28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40#ifdef __SSE__ 41#include <xmmintrin.h> 42#endif 43 44#include "opj_includes.h" 45 46/** @defgroup DWT DWT - Implementation of a discrete wavelet transform */ 47/*@{*/ 48 49/** @name Local data structures */ 50/*@{*/ 51 52typedef struct dwt_local { 53 OPJ_INT32* mem; 54 OPJ_SIZE_T mem_count; 55 OPJ_INT32 dn; 56 OPJ_INT32 sn; 57 OPJ_INT32 cas; 58} opj_dwt_t; 59 60typedef union { 61 OPJ_FLOAT32 f[4]; 62} opj_v4_t; 63 64typedef struct v4dwt_local { 65 opj_v4_t* wavelet ; 66 OPJ_INT32 dn ; 67 OPJ_INT32 sn ; 68 OPJ_INT32 cas ; 69} opj_v4dwt_t ; 70 71static const OPJ_FLOAT32 opj_dwt_alpha = 1.586134342f; /* 12994 */ 72static const OPJ_FLOAT32 opj_dwt_beta = 0.052980118f; /* 434 */ 73static const OPJ_FLOAT32 opj_dwt_gamma = -0.882911075f; /* -7233 */ 74static const OPJ_FLOAT32 opj_dwt_delta = -0.443506852f; /* -3633 */ 75 76static const OPJ_FLOAT32 opj_K = 1.230174105f; /* 10078 */ 77static const OPJ_FLOAT32 opj_c13318 = 1.625732422f; 78 79/*@}*/ 80 81/** 82Virtual function type for wavelet transform in 1-D 83*/ 84typedef void (*DWT1DFN)(opj_dwt_t* v); 85 86/** @name Local static functions */ 87/*@{*/ 88 89/** 90Forward lazy transform (horizontal) 91*/ 92static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); 93/** 94Forward lazy transform (vertical) 95*/ 96static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas); 97/** 98Inverse lazy transform (horizontal) 99*/ 100static void opj_dwt_interleave_h(opj_dwt_t* h, OPJ_INT32 *a); 101/** 102Inverse lazy transform (vertical) 103*/ 104static void opj_dwt_interleave_v(opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x); 105/** 106Forward 5-3 wavelet transform in 1-D 107*/ 108static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); 109/** 110Inverse 5-3 wavelet transform in 1-D 111*/ 112static void opj_dwt_decode_1(opj_dwt_t *v); 113static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); 114/** 115Forward 9-7 wavelet transform in 1-D 116*/ 117static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); 118/** 119Explicit calculation of the Quantization Stepsizes 120*/ 121static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_stepsize_t *bandno_stepsize); 122/** 123Inverse wavelet transform in 2-D. 124*/ 125static OPJ_BOOL opj_dwt_decode_tile(const opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn); 126 127static OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec, 128 void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32)); 129 130static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i); 131 132/* <summary> */ 133/* Inverse 9-7 wavelet transform in 1-D. */ 134/* </summary> */ 135static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); 136 137static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size); 138 139static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); 140 141#ifdef __SSE__ 142static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c); 143 144static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c); 145 146#else 147static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c); 148 149static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c); 150 151#endif 152 153/*@}*/ 154 155/*@}*/ 156 157#define IDX_S(i) (i)*2 158#define IDX_D(i) 1 + (i)* 2 159#define UNDERFLOW_SN(i) ((i) >= sn&&sn>0) 160#define UNDERFLOW_DN(i) ((i) >= dn&&dn>0) 161#define OVERFLOW_S(i) (IDX_S(i) >= a_count) 162#define OVERFLOW_D(i) (IDX_D(i) >= a_count) 163 164#define OPJ_S(i) a[IDX_S(i)] 165#define OPJ_D(i) a[IDX_D(i)] 166#define OPJ_S_(i) ((i)<0 ? OPJ_S(0) : (UNDERFLOW_SN(i) ? OPJ_S(sn - 1) : OVERFLOW_S(i) ? OPJ_S(i - 1) : OPJ_S(i))) 167#define OPJ_D_(i) ((i)<0 ? OPJ_D(0) : (UNDERFLOW_DN(i) ? OPJ_D(dn - 1) : OVERFLOW_D(i) ? OPJ_D(i - 1) : OPJ_D(i))) 168/* new */ 169#define OPJ_SS_(i) ((i)<0 ? OPJ_S(0) : (UNDERFLOW_DN(i) ? OPJ_S(dn - 1) : OVERFLOW_S(i) ? OPJ_S(i - 1) : OPJ_S(i))) 170#define OPJ_DD_(i) ((i)<0 ? OPJ_D(0) : (UNDERFLOW_SN(i) ? OPJ_D(sn - 1) : OVERFLOW_D(i) ? OPJ_D(i - 1) : OPJ_D(i))) 171 172/* <summary> */ 173/* This table contains the norms of the 5-3 wavelets for different bands. */ 174/* </summary> */ 175static const OPJ_FLOAT64 opj_dwt_norms[4][10] = { 176 {1.000, 1.500, 2.750, 5.375, 10.68, 21.34, 42.67, 85.33, 170.7, 341.3}, 177 {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9}, 178 {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9}, 179 {.7186, .9218, 1.586, 3.043, 6.019, 12.01, 24.00, 47.97, 95.93} 180}; 181 182/* <summary> */ 183/* This table contains the norms of the 9-7 wavelets for different bands. */ 184/* </summary> */ 185static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = { 186 {1.000, 1.965, 4.177, 8.403, 16.90, 33.84, 67.69, 135.3, 270.6, 540.9}, 187 {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0}, 188 {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0}, 189 {2.080, 3.865, 8.307, 17.18, 34.71, 69.59, 139.3, 278.6, 557.2} 190}; 191 192/* 193========================================================== 194 local functions 195========================================================== 196*/ 197 198/* <summary> */ 199/* Forward lazy transform (horizontal). */ 200/* </summary> */ 201static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { 202 OPJ_INT32 i; 203 OPJ_INT32 * l_dest = b; 204 OPJ_INT32 * l_src = a+cas; 205 206 for (i=0; i<sn; ++i) { 207 *l_dest++ = *l_src; 208 l_src += 2; 209 } 210 211 l_dest = b + sn; 212 l_src = a + 1 - cas; 213 214 for (i=0; i<dn; ++i) { 215 *l_dest++=*l_src; 216 l_src += 2; 217 } 218} 219 220/* <summary> */ 221/* Forward lazy transform (vertical). */ 222/* </summary> */ 223static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas) { 224 OPJ_INT32 i = sn; 225 OPJ_INT32 * l_dest = b; 226 OPJ_INT32 * l_src = a+cas; 227 228 while (i--) { 229 *l_dest = *l_src; 230 l_dest += x; 231 l_src += 2; 232 } /* b[i*x]=a[2*i+cas]; */ 233 234 l_dest = b + sn * x; 235 l_src = a + 1 - cas; 236 237 i = dn; 238 while (i--) { 239 *l_dest = *l_src; 240 l_dest += x; 241 l_src += 2; 242 } /*b[(sn+i)*x]=a[(2*i+1-cas)];*/ 243} 244 245/* <summary> */ 246/* Inverse lazy transform (horizontal). */ 247/* </summary> */ 248static void opj_dwt_interleave_h(opj_dwt_t* h, OPJ_INT32 *a) { 249 OPJ_INT32 *ai = a; 250 OPJ_INT32 *bi = h->mem + h->cas; 251 OPJ_INT32 i = h->sn; 252 while( i-- ) { 253 *bi = *(ai++); 254 bi += 2; 255 } 256 ai = a + h->sn; 257 bi = h->mem + 1 - h->cas; 258 i = h->dn ; 259 while( i-- ) { 260 *bi = *(ai++); 261 bi += 2; 262 } 263} 264 265/* <summary> */ 266/* Inverse lazy transform (vertical). */ 267/* </summary> */ 268static void opj_dwt_interleave_v(opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) { 269 OPJ_INT32 *ai = a; 270 OPJ_INT32 *bi = v->mem + v->cas; 271 OPJ_INT32 i = v->sn; 272 while( i-- ) { 273 *bi = *ai; 274 bi += 2; 275 ai += x; 276 } 277 ai = a + (v->sn * x); 278 bi = v->mem + 1 - v->cas; 279 i = v->dn ; 280 while( i-- ) { 281 *bi = *ai; 282 bi += 2; 283 ai += x; 284 } 285} 286 287 288/* <summary> */ 289/* Forward 5-3 wavelet transform in 1-D. */ 290/* </summary> */ 291static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { 292 OPJ_INT32 i; 293 294 if (!cas) { 295 if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ 296 for (i = 0; i < dn; i++) OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; 297 for (i = 0; i < sn; i++) OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; 298 } 299 } else { 300 if (!sn && dn == 1) /* NEW : CASE ONE ELEMENT */ 301 OPJ_S(0) *= 2; 302 else { 303 for (i = 0; i < dn; i++) OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1; 304 for (i = 0; i < sn; i++) OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2; 305 } 306 } 307} 308 309/* <summary> */ 310/* Inverse 5-3 wavelet transform in 1-D. */ 311/* </summary> */ 312static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { 313 OPJ_INT32 i; 314 315 if (!cas) { 316 if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ 317 for (i = 0; i < sn; i++) OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; 318 for (i = 0; i < dn; i++) OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; 319 } 320 } else { 321 if (!sn && dn == 1) /* NEW : CASE ONE ELEMENT */ 322 OPJ_S(0) /= 2; 323 else { 324 for (i = 0; i < sn; i++) OPJ_D(i) -= (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2; 325 for (i = 0; i < dn; i++) OPJ_S(i) += (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1; 326 } 327 } 328} 329 330/* <summary> */ 331/* Inverse 5-3 wavelet transform in 1-D. */ 332/* </summary> */ 333static void opj_dwt_decode_1(opj_dwt_t *v) { 334 opj_dwt_decode_1_(v->mem, v->mem_count, v->dn, v->sn, v->cas); 335} 336 337/* <summary> */ 338/* Forward 9-7 wavelet transform in 1-D. */ 339/* </summary> */ 340static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { 341 OPJ_INT32 i; 342 if (!cas) { 343 if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ 344 for (i = 0; i < dn; i++) 345 OPJ_D(i) -= opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 12993); 346 for (i = 0; i < sn; i++) 347 OPJ_S(i) -= opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 434); 348 for (i = 0; i < dn; i++) 349 OPJ_D(i) += opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 7233); 350 for (i = 0; i < sn; i++) 351 OPJ_S(i) += opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 3633); 352 for (i = 0; i < dn; i++) 353 OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 5038); /*5038 */ 354 for (i = 0; i < sn; i++) 355 OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 6659); /*6660 */ 356 } 357 } else { 358 if ((sn > 0) || (dn > 1)) { /* NEW : CASE ONE ELEMENT */ 359 for (i = 0; i < dn; i++) 360 OPJ_S(i) -= opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 12993); 361 for (i = 0; i < sn; i++) 362 OPJ_D(i) -= opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 434); 363 for (i = 0; i < dn; i++) 364 OPJ_S(i) += opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 7233); 365 for (i = 0; i < sn; i++) 366 OPJ_D(i) += opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 3633); 367 for (i = 0; i < dn; i++) 368 OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 5038); /*5038 */ 369 for (i = 0; i < sn; i++) 370 OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 6659); /*6660 */ 371 } 372 } 373} 374 375static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_stepsize_t *bandno_stepsize) { 376 OPJ_INT32 p, n; 377 p = opj_int_floorlog2(stepsize) - 13; 378 n = 11 - opj_int_floorlog2(stepsize); 379 bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff; 380 bandno_stepsize->expn = numbps - p; 381} 382 383/* 384========================================================== 385 DWT interface 386========================================================== 387*/ 388 389 390/* <summary> */ 391/* Forward 5-3 wavelet transform in 2-D. */ 392/* </summary> */ 393static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec, void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32)) 394{ 395 OPJ_INT32 i, j, k; 396 OPJ_INT32 *a = 00; 397 OPJ_INT32 *aj = 00; 398 OPJ_INT32 *bj = 00; 399 OPJ_INT32 w, l; 400 401 OPJ_INT32 rw; /* width of the resolution level computed */ 402 OPJ_INT32 rh; /* height of the resolution level computed */ 403 OPJ_SIZE_T l_data_count; 404 OPJ_SIZE_T l_data_size; 405 406 opj_tcd_resolution_t * l_cur_res = 0; 407 opj_tcd_resolution_t * l_last_res = 0; 408 409 w = tilec->x1-tilec->x0; 410 l = (OPJ_INT32)tilec->numresolutions-1; 411 a = tilec->data; 412 413 l_cur_res = tilec->resolutions + l; 414 l_last_res = l_cur_res - 1; 415 416 l_data_count = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions) * (OPJ_UINT32)sizeof(OPJ_INT32); 417 l_data_size = l_data_count * (OPJ_UINT32)sizeof(OPJ_INT32); 418 bj = (OPJ_INT32*)opj_malloc(l_data_size); 419 if (! bj) { 420 return OPJ_FALSE; 421 } 422 i = l; 423 424 while (i--) { 425 OPJ_INT32 rw1; /* width of the resolution level once lower than computed one */ 426 OPJ_INT32 rh1; /* height of the resolution level once lower than computed one */ 427 OPJ_INT32 cas_col; /* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */ 428 OPJ_INT32 cas_row; /* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering */ 429 OPJ_INT32 dn, sn; 430 431 rw = l_cur_res->x1 - l_cur_res->x0; 432 rh = l_cur_res->y1 - l_cur_res->y0; 433 rw1 = l_last_res->x1 - l_last_res->x0; 434 rh1 = l_last_res->y1 - l_last_res->y0; 435 436 cas_row = l_cur_res->x0 & 1; 437 cas_col = l_cur_res->y0 & 1; 438 439 sn = rh1; 440 dn = rh - rh1; 441 for (j = 0; j < rw; ++j) { 442 aj = a + j; 443 for (k = 0; k < rh; ++k) { 444 bj[k] = aj[k*w]; 445 } 446 447 (*p_function) (bj, l_data_count, dn, sn, cas_col); 448 449 opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col); 450 } 451 452 sn = rw1; 453 dn = rw - rw1; 454 455 for (j = 0; j < rh; j++) { 456 aj = a + j * w; 457 for (k = 0; k < rw; k++) bj[k] = aj[k]; 458 (*p_function) (bj, l_data_count, dn, sn, cas_row); 459 opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row); 460 } 461 462 l_cur_res = l_last_res; 463 464 --l_last_res; 465 } 466 467 opj_free(bj); 468 return OPJ_TRUE; 469} 470 471/* Forward 5-3 wavelet transform in 2-D. */ 472/* </summary> */ 473OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec) 474{ 475 return opj_dwt_encode_procedure(tilec,opj_dwt_encode_1); 476} 477 478/* <summary> */ 479/* Inverse 5-3 wavelet transform in 2-D. */ 480/* </summary> */ 481OPJ_BOOL opj_dwt_decode(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) { 482 return opj_dwt_decode_tile(tilec, numres, &opj_dwt_decode_1); 483} 484 485 486/* <summary> */ 487/* Get gain of 5-3 wavelet transform. */ 488/* </summary> */ 489OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) { 490 if (orient == 0) 491 return 0; 492 if (orient == 1 || orient == 2) 493 return 1; 494 return 2; 495} 496 497/* <summary> */ 498/* Get norm of 5-3 wavelet. */ 499/* </summary> */ 500OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient) { 501 return opj_dwt_norms[orient][level]; 502} 503 504/* <summary> */ 505/* Forward 9-7 wavelet transform in 2-D. */ 506/* </summary> */ 507OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec) 508{ 509 return opj_dwt_encode_procedure(tilec,opj_dwt_encode_1_real); 510} 511 512/* <summary> */ 513/* Get gain of 9-7 wavelet transform. */ 514/* </summary> */ 515OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient) { 516 (void)orient; 517 return 0; 518} 519 520/* <summary> */ 521/* Get norm of 9-7 wavelet. */ 522/* </summary> */ 523OPJ_FLOAT64 opj_dwt_getnorm_real(OPJ_UINT32 level, OPJ_UINT32 orient) { 524 return opj_dwt_norms_real[orient][level]; 525} 526 527void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) { 528 OPJ_UINT32 numbands, bandno; 529 numbands = 3 * tccp->numresolutions - 2; 530 for (bandno = 0; bandno < numbands; bandno++) { 531 OPJ_FLOAT64 stepsize; 532 OPJ_UINT32 resno, level, orient, gain; 533 534 resno = (bandno == 0) ? 0 : ((bandno - 1) / 3 + 1); 535 orient = (bandno == 0) ? 0 : ((bandno - 1) % 3 + 1); 536 level = tccp->numresolutions - 1 - resno; 537 gain = (tccp->qmfbid == 0) ? 0 : ((orient == 0) ? 0 : (((orient == 1) || (orient == 2)) ? 1 : 2)); 538 if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) { 539 stepsize = 1.0; 540 } else { 541 OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level]; 542 stepsize = (1 << (gain)) / norm; 543 } 544 opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0), (OPJ_INT32)(prec + gain), &tccp->stepsizes[bandno]); 545 } 546} 547 548/* <summary> */ 549/* Determine maximum computed resolution level for inverse wavelet transform */ 550/* </summary> */ 551static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i) { 552 OPJ_UINT32 mr = 0; 553 OPJ_UINT32 w; 554 while( --i ) { 555 ++r; 556 if( mr < ( w = (OPJ_UINT32)(r->x1 - r->x0) ) ) 557 mr = w ; 558 if( mr < ( w = (OPJ_UINT32)(r->y1 - r->y0) ) ) 559 mr = w ; 560 } 561 return mr ; 562} 563 564/* <summary> */ 565/* Inverse wavelet transform in 2-D. */ 566/* </summary> */ 567static OPJ_BOOL opj_dwt_decode_tile(const opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) { 568 opj_dwt_t h; 569 opj_dwt_t v; 570 571 opj_tcd_resolution_t* tr = tilec->resolutions; 572 573 OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 - tr->x0); /* width of the resolution level computed */ 574 OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ 575 576 OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); 577 578 h.mem_count = opj_dwt_max_resolution(tr, numres); 579 if (((OPJ_UINT32)-1) / (OPJ_UINT32)sizeof(OPJ_INT32) < (OPJ_UINT32)h.mem_count) { 580 return OPJ_FALSE; 581 } 582 h.mem = (OPJ_INT32*)opj_aligned_malloc(h.mem_count * sizeof(OPJ_INT32)); 583 if (! h.mem){ 584 /* FIXME event manager error callback */ 585 return OPJ_FALSE; 586 } 587 588 v.mem_count = h.mem_count; 589 v.mem = h.mem; 590 591 while( --numres) { 592 OPJ_INT32 * restrict tiledp = tilec->data; 593 OPJ_UINT32 j; 594 595 ++tr; 596 h.sn = (OPJ_INT32)rw; 597 v.sn = (OPJ_INT32)rh; 598 599 rw = (OPJ_UINT32)(tr->x1 - tr->x0); 600 rh = (OPJ_UINT32)(tr->y1 - tr->y0); 601 602 h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn); 603 h.cas = tr->x0 % 2; 604 605 for(j = 0; j < rh; ++j) { 606 opj_dwt_interleave_h(&h, &tiledp[j*w]); 607 (dwt_1D)(&h); 608 memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32)); 609 } 610 611 v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn); 612 v.cas = tr->y0 % 2; 613 614 for(j = 0; j < rw; ++j){ 615 OPJ_UINT32 k; 616 opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w); 617 (dwt_1D)(&v); 618 for(k = 0; k < rh; ++k) { 619 tiledp[k * w + j] = v.mem[k]; 620 } 621 } 622 } 623 opj_aligned_free(h.mem); 624 return OPJ_TRUE; 625} 626 627static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size){ 628 OPJ_FLOAT32* restrict bi = (OPJ_FLOAT32*) (w->wavelet + w->cas); 629 OPJ_INT32 count = w->sn; 630 OPJ_INT32 i, k; 631 632 for(k = 0; k < 2; ++k){ 633 if ( count + 3 * x < size && ((size_t) a & 0x0f) == 0 && ((size_t) bi & 0x0f) == 0 && (x & 0x0f) == 0 ) { 634 /* Fast code path */ 635 for(i = 0; i < count; ++i){ 636 OPJ_INT32 j = i; 637 bi[i*8 ] = a[j]; 638 j += x; 639 bi[i*8 + 1] = a[j]; 640 j += x; 641 bi[i*8 + 2] = a[j]; 642 j += x; 643 bi[i*8 + 3] = a[j]; 644 } 645 } 646 else { 647 /* Slow code path */ 648 for(i = 0; i < count; ++i){ 649 OPJ_INT32 j = i; 650 bi[i*8 ] = a[j]; 651 j += x; 652 if(j >= size) continue; 653 bi[i*8 + 1] = a[j]; 654 j += x; 655 if(j >= size) continue; 656 bi[i*8 + 2] = a[j]; 657 j += x; 658 if(j >= size) continue; 659 bi[i*8 + 3] = a[j]; /* This one*/ 660 } 661 } 662 663 bi = (OPJ_FLOAT32*) (w->wavelet + 1 - w->cas); 664 a += w->sn; 665 size -= w->sn; 666 count = w->dn; 667 } 668} 669 670static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read){ 671 opj_v4_t* restrict bi = v->wavelet + v->cas; 672 OPJ_INT32 i; 673 674 for(i = 0; i < v->sn; ++i){ 675 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); 676 } 677 678 a += v->sn * x; 679 bi = v->wavelet + 1 - v->cas; 680 681 for(i = 0; i < v->dn; ++i){ 682 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); 683 } 684} 685 686#ifdef __SSE__ 687 688static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ 689 __m128* restrict vw = (__m128*) w; 690 OPJ_INT32 i; 691 /* 4x unrolled loop */ 692 for(i = 0; i < count >> 2; ++i){ 693 *vw = _mm_mul_ps(*vw, c); 694 vw += 2; 695 *vw = _mm_mul_ps(*vw, c); 696 vw += 2; 697 *vw = _mm_mul_ps(*vw, c); 698 vw += 2; 699 *vw = _mm_mul_ps(*vw, c); 700 vw += 2; 701 } 702 count &= 3; 703 for(i = 0; i < count; ++i){ 704 *vw = _mm_mul_ps(*vw, c); 705 vw += 2; 706 } 707} 708 709void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c){ 710 __m128* restrict vl = (__m128*) l; 711 __m128* restrict vw = (__m128*) w; 712 OPJ_INT32 i; 713 __m128 tmp1, tmp2, tmp3; 714 tmp1 = vl[0]; 715 for(i = 0; i < m; ++i){ 716 tmp2 = vw[-1]; 717 tmp3 = vw[ 0]; 718 vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); 719 tmp1 = tmp3; 720 vw += 2; 721 } 722 vl = vw - 2; 723 if(m >= k){ 724 return; 725 } 726 c = _mm_add_ps(c, c); 727 c = _mm_mul_ps(c, vl[0]); 728 for(; m < k; ++m){ 729 __m128 tmp = vw[-1]; 730 vw[-1] = _mm_add_ps(tmp, c); 731 vw += 2; 732 } 733} 734 735#else 736 737static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c) 738{ 739 OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w; 740 OPJ_INT32 i; 741 for(i = 0; i < count; ++i){ 742 OPJ_FLOAT32 tmp1 = fw[i*8 ]; 743 OPJ_FLOAT32 tmp2 = fw[i*8 + 1]; 744 OPJ_FLOAT32 tmp3 = fw[i*8 + 2]; 745 OPJ_FLOAT32 tmp4 = fw[i*8 + 3]; 746 fw[i*8 ] = tmp1 * c; 747 fw[i*8 + 1] = tmp2 * c; 748 fw[i*8 + 2] = tmp3 * c; 749 fw[i*8 + 3] = tmp4 * c; 750 } 751} 752 753static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c) 754{ 755 OPJ_FLOAT32* restrict fl = (OPJ_FLOAT32*) l; 756 OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w; 757 OPJ_INT32 i; 758 for(i = 0; i < m; ++i){ 759 OPJ_FLOAT32 tmp1_1 = fl[0]; 760 OPJ_FLOAT32 tmp1_2 = fl[1]; 761 OPJ_FLOAT32 tmp1_3 = fl[2]; 762 OPJ_FLOAT32 tmp1_4 = fl[3]; 763 OPJ_FLOAT32 tmp2_1 = fw[-4]; 764 OPJ_FLOAT32 tmp2_2 = fw[-3]; 765 OPJ_FLOAT32 tmp2_3 = fw[-2]; 766 OPJ_FLOAT32 tmp2_4 = fw[-1]; 767 OPJ_FLOAT32 tmp3_1 = fw[0]; 768 OPJ_FLOAT32 tmp3_2 = fw[1]; 769 OPJ_FLOAT32 tmp3_3 = fw[2]; 770 OPJ_FLOAT32 tmp3_4 = fw[3]; 771 fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c); 772 fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c); 773 fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c); 774 fw[-1] = tmp2_4 + ((tmp1_4 + tmp3_4) * c); 775 fl = fw; 776 fw += 8; 777 } 778 if(m < k){ 779 OPJ_FLOAT32 c1; 780 OPJ_FLOAT32 c2; 781 OPJ_FLOAT32 c3; 782 OPJ_FLOAT32 c4; 783 c += c; 784 c1 = fl[0] * c; 785 c2 = fl[1] * c; 786 c3 = fl[2] * c; 787 c4 = fl[3] * c; 788 for(; m < k; ++m){ 789 OPJ_FLOAT32 tmp1 = fw[-4]; 790 OPJ_FLOAT32 tmp2 = fw[-3]; 791 OPJ_FLOAT32 tmp3 = fw[-2]; 792 OPJ_FLOAT32 tmp4 = fw[-1]; 793 fw[-4] = tmp1 + c1; 794 fw[-3] = tmp2 + c2; 795 fw[-2] = tmp3 + c3; 796 fw[-1] = tmp4 + c4; 797 fw += 8; 798 } 799 } 800} 801 802#endif 803 804/* <summary> */ 805/* Inverse 9-7 wavelet transform in 1-D. */ 806/* </summary> */ 807void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt) 808{ 809 OPJ_INT32 a, b; 810 if(dwt->cas == 0) { 811 if(!((dwt->dn > 0) || (dwt->sn > 1))){ 812 return; 813 } 814 a = 0; 815 b = 1; 816 }else{ 817 if(!((dwt->sn > 0) || (dwt->dn > 1))) { 818 return; 819 } 820 a = 1; 821 b = 0; 822 } 823#ifdef __SSE__ 824 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); 825 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c13318)); 826 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); 827 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); 828 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); 829 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); 830#else 831 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); 832 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); 833 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), opj_dwt_delta); 834 opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), opj_dwt_gamma); 835 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), opj_dwt_beta); 836 opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), opj_dwt_alpha); 837#endif 838} 839 840 841/* <summary> */ 842/* Inverse 9-7 wavelet transform in 2-D. */ 843/* </summary> */ 844OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, OPJ_UINT32 numres) 845{ 846 opj_v4dwt_t h; 847 opj_v4dwt_t v; 848 849 opj_tcd_resolution_t* res = tilec->resolutions; 850 851 OPJ_UINT32 rw = (OPJ_UINT32)(res->x1 - res->x0); /* width of the resolution level computed */ 852 OPJ_UINT32 rh = (OPJ_UINT32)(res->y1 - res->y0); /* height of the resolution level computed */ 853 854 OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); 855 856 OPJ_UINT32 mr = opj_dwt_max_resolution(res, numres); 857 858 if (mr >= ((OPJ_UINT32)-5)) { 859 return OPJ_FALSE; 860 } 861 mr += 5; 862 863 if (((OPJ_UINT32)-1) / (OPJ_UINT32)sizeof(opj_v4_t) < mr) { 864 return OPJ_FALSE; 865 } 866 h.wavelet = (opj_v4_t*) opj_aligned_malloc(mr * sizeof(opj_v4_t)); 867 if (!h.wavelet) { 868 /* FIXME event manager error callback */ 869 return OPJ_FALSE; 870 } 871 v.wavelet = h.wavelet; 872 873 while( --numres) { 874 OPJ_FLOAT32 * restrict aj = (OPJ_FLOAT32*) tilec->data; 875 OPJ_UINT32 bufsize = (OPJ_UINT32)((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0)); 876 OPJ_INT32 j; 877 878 h.sn = (OPJ_INT32)rw; 879 v.sn = (OPJ_INT32)rh; 880 881 ++res; 882 883 rw = (OPJ_UINT32)(res->x1 - res->x0); /* width of the resolution level computed */ 884 rh = (OPJ_UINT32)(res->y1 - res->y0); /* height of the resolution level computed */ 885 886 h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn); 887 h.cas = res->x0 % 2; 888 889 for(j = (OPJ_INT32)rh; j > 3; j -= 4) { 890 OPJ_INT32 k; 891 opj_v4dwt_interleave_h(&h, aj, (OPJ_INT32)w, (OPJ_INT32)bufsize); 892 opj_v4dwt_decode(&h); 893 894 for(k = (OPJ_INT32)rw; --k >= 0;){ 895 aj[k ] = h.wavelet[k].f[0]; 896 aj[k+(OPJ_INT32)w ] = h.wavelet[k].f[1]; 897 aj[k+(OPJ_INT32)w*2] = h.wavelet[k].f[2]; 898 aj[k+(OPJ_INT32)w*3] = h.wavelet[k].f[3]; 899 } 900 901 aj += w*4; 902 bufsize -= w*4; 903 } 904 905 if (rh & 0x03) { 906 OPJ_INT32 k; 907 j = rh & 0x03; 908 opj_v4dwt_interleave_h(&h, aj, (OPJ_INT32)w, (OPJ_INT32)bufsize); 909 opj_v4dwt_decode(&h); 910 for(k = (OPJ_INT32)rw; --k >= 0;){ 911 switch(j) { 912 case 3: aj[k+(OPJ_INT32)w*2] = h.wavelet[k].f[2]; 913 case 2: aj[k+(OPJ_INT32)w ] = h.wavelet[k].f[1]; 914 case 1: aj[k ] = h.wavelet[k].f[0]; 915 } 916 } 917 } 918 919 v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn); 920 v.cas = res->y0 % 2; 921 922 aj = (OPJ_FLOAT32*) tilec->data; 923 for(j = (OPJ_INT32)rw; j > 3; j -= 4){ 924 OPJ_UINT32 k; 925 926 opj_v4dwt_interleave_v(&v, aj, (OPJ_INT32)w, 4); 927 opj_v4dwt_decode(&v); 928 929 for(k = 0; k < rh; ++k){ 930 memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); 931 } 932 aj += 4; 933 } 934 935 if (rw & 0x03){ 936 OPJ_UINT32 k; 937 938 j = rw & 0x03; 939 940 opj_v4dwt_interleave_v(&v, aj, (OPJ_INT32)w, j); 941 opj_v4dwt_decode(&v); 942 943 for(k = 0; k < rh; ++k){ 944 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32)); 945 } 946 } 947 } 948 949 opj_aligned_free(h.wavelet); 950 return OPJ_TRUE; 951} 952