1274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek/* 2274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * jfdctflt.c 3274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * 4274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * Copyright (C) 1994-1996, Thomas G. Lane. 5274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * This file is part of the Independent JPEG Group's software. 6274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * For conditions of distribution and use, see the accompanying README file. 7274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * 8274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * This file contains a floating-point implementation of the 9274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * forward DCT (Discrete Cosine Transform). 10274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * 11274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * This implementation should be more accurate than either of the integer 12274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * DCT implementations. However, it may not give the same results on all 13274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * machines because of differences in roundoff behavior. Speed will depend 140c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * on the hardware's floating point capacity. 150c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * 160c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT 17274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * on each column. Direct algorithms are also available, but they are 18274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * much more complex and seem not to be any faster when reduced to code. 190c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * 200c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * This implementation is based on Arai, Agui, and Nakajima's algorithm for 210c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in 220c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * Japanese, but the algorithm is described in the Pennebaker & Mitchell 230c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * JPEG textbook (see REFERENCES section in file README). The following code 246f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner * is based directly on figure 4-8 in P&M. 256f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner * While an 8-point DCT cannot be done in less than 11 multiplies, it is 266f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner * possible to arrange the computation so that many of the multiplies are 276f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner * simple scalings of the final outputs. These multiplies can then be 28274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * folded into the multiplications or divisions by the JPEG quantization 29274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * table entries. The AA&N method leaves only 5 multiplies and 29 adds 307b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek * to be done in the DCT itself. 31268ee7016a2811803989487c0ad3799486092c63Ted Kremenek * The primary disadvantage of this method is that with a fixed-point 32e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek * implementation, accuracy is lost due to imprecise representation of the 33e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek * scaled quantization values. However, that problem does not arise if 34e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek * we use floating point arithmetic. 35e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek */ 365ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner 37d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek#define JPEG_INTERNALS 38da9d61c96c412f6babc7f824152609562f302388Chris Lattner#include "jinclude.h" 39da9d61c96c412f6babc7f824152609562f302388Chris Lattner#include "jpeglib.h" 40da9d61c96c412f6babc7f824152609562f302388Chris Lattner#include "jdct.h" /* Private declarations for DCT subsystem */ 41da9d61c96c412f6babc7f824152609562f302388Chris Lattner 42da9d61c96c412f6babc7f824152609562f302388Chris Lattner#ifdef DCT_FLOAT_SUPPORTED 43d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek 44d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek 45d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek/* 46d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek * This module is specialized to the case DCTSIZE = 8. 47d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek */ 48d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek 49d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek#if DCTSIZE != 8 50d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 51d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek#endif 525ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner 53fbc33388c199d6f731170bf55719d57373a09c1fChris Lattner 54fbc33388c199d6f731170bf55719d57373a09c1fChris Lattner/* 55f15674c680730c652a37a16a5d3f3ff429b0c308Chris Lattner * Perform the forward DCT on one block of samples. 566f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner */ 576f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner 58da9d61c96c412f6babc7f824152609562f302388Chris LattnerGLOBAL(void) 59e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenekjpeg_fdct_float (FAST_FLOAT * data) 60e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek{ 61e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 627e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek FAST_FLOAT tmp10, tmp11, tmp12, tmp13; 637e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek FAST_FLOAT z1, z2, z3, z4, z5, z11, z13; 647e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek FAST_FLOAT *dataptr; 657e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek int ctr; 667e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek 677e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek /* Pass 1: process rows. */ 687e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek 697e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek dataptr = data; 707e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 717e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek tmp0 = dataptr[0] + dataptr[7]; 727e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek tmp7 = dataptr[0] - dataptr[7]; 737e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek tmp1 = dataptr[1] + dataptr[6]; 747e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek tmp6 = dataptr[1] - dataptr[6]; 757e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek tmp2 = dataptr[2] + dataptr[5]; 767e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek tmp5 = dataptr[2] - dataptr[5]; 77da9d61c96c412f6babc7f824152609562f302388Chris Lattner tmp3 = dataptr[3] + dataptr[4]; 78e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek tmp4 = dataptr[3] - dataptr[4]; 79e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek 80e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek /* Even part */ 81e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek 82da9d61c96c412f6babc7f824152609562f302388Chris Lattner tmp10 = tmp0 + tmp3; /* phase 2 */ 83277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek tmp13 = tmp0 - tmp3; 842b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner tmp11 = tmp1 + tmp2; 85277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek tmp12 = tmp1 - tmp2; 862b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner 872b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner dataptr[0] = tmp10 + tmp11; /* phase 3 */ 885f074266cc59563036c40516c814d63825723e20Ted Kremenek dataptr[4] = tmp10 - tmp11; 89274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek 90e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ 91e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek dataptr[2] = tmp13 + z1; /* phase 5 */ 92866bdf74547efe32c320554837ffce00fcc084feTed Kremenek dataptr[6] = tmp13 - z1; 93866bdf74547efe32c320554837ffce00fcc084feTed Kremenek 94866bdf74547efe32c320554837ffce00fcc084feTed Kremenek /* Odd part */ 95866bdf74547efe32c320554837ffce00fcc084feTed Kremenek 96e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek tmp10 = tmp4 + tmp5; /* phase 2 */ 97866bdf74547efe32c320554837ffce00fcc084feTed Kremenek tmp11 = tmp5 + tmp6; 98aff6ef8e7bc3c3739f984c390e0af693e60be064Chris Lattner tmp12 = tmp6 + tmp7; 99866bdf74547efe32c320554837ffce00fcc084feTed Kremenek 1001b5285e1ba31975864da356b2ed927e87670e654Chris Lattner /* The rotator is modified from fig 4-8 to avoid extra negations. */ 1015ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */ 1025ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */ 1035ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ 1047b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ 1057b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek 1067b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek z11 = tmp7 + z3; /* phase 5 */ 107aff6ef8e7bc3c3739f984c390e0af693e60be064Chris Lattner z13 = tmp7 - z3; 1087b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek 109aff6ef8e7bc3c3739f984c390e0af693e60be064Chris Lattner dataptr[5] = z13 + z2; /* phase 6 */ 110e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek dataptr[3] = z13 - z2; 111866bdf74547efe32c320554837ffce00fcc084feTed Kremenek dataptr[1] = z11 + z4; 112866bdf74547efe32c320554837ffce00fcc084feTed Kremenek dataptr[7] = z11 - z4; 113866bdf74547efe32c320554837ffce00fcc084feTed Kremenek 114e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek dataptr += DCTSIZE; /* advance pointer to next row */ 115866bdf74547efe32c320554837ffce00fcc084feTed Kremenek } 116898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner 117898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner /* Pass 2: process columns. */ 11859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek 1192b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner dataptr = data; 120866bdf74547efe32c320554837ffce00fcc084feTed Kremenek for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 12189d7ee9619d2dbdfa8d956a695c612a104a92cadTed Kremenek tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; 122d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; 123277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; 124277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; 125277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; 126277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; 127d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; 128d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; 129863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner 130d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner /* Even part */ 131863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner 132863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner tmp10 = tmp0 + tmp3; /* phase 2 */ 133863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner tmp13 = tmp0 - tmp3; 134863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner tmp11 = tmp1 + tmp2; 135863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner tmp12 = tmp1 - tmp2; 136d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner 137d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ 138d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner dataptr[DCTSIZE*4] = tmp10 - tmp11; 139d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner 140d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ 141866bdf74547efe32c320554837ffce00fcc084feTed Kremenek dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ 142866bdf74547efe32c320554837ffce00fcc084feTed Kremenek dataptr[DCTSIZE*6] = tmp13 - z1; 143866bdf74547efe32c320554837ffce00fcc084feTed Kremenek 1445f074266cc59563036c40516c814d63825723e20Ted Kremenek /* Odd part */ 1455f074266cc59563036c40516c814d63825723e20Ted Kremenek 1465f074266cc59563036c40516c814d63825723e20Ted Kremenek tmp10 = tmp4 + tmp5; /* phase 2 */ 1475f074266cc59563036c40516c814d63825723e20Ted Kremenek tmp11 = tmp5 + tmp6; 1485f074266cc59563036c40516c814d63825723e20Ted Kremenek tmp12 = tmp6 + tmp7; 1495f074266cc59563036c40516c814d63825723e20Ted Kremenek 1505f074266cc59563036c40516c814d63825723e20Ted Kremenek /* The rotator is modified from fig 4-8 to avoid extra negations. */ 15189d7ee9619d2dbdfa8d956a695c612a104a92cadTed Kremenek z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */ 152898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */ 153e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ 154e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ 155e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek 156cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek z11 = tmp7 + z3; /* phase 5 */ 15759d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek z13 = tmp7 - z3; 15859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek 15959d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ 16059d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek dataptr[DCTSIZE*3] = z13 - z2; 16159d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek dataptr[DCTSIZE*1] = z11 + z4; 16259d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek dataptr[DCTSIZE*7] = z11 - z4; 163d6f53dc4951aace69014619761760addac9e59ecTed Kremenek 16459d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek dataptr++; /* advance pointer to next column */ 165cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek } 166cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek} 167cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek 168d6f53dc4951aace69014619761760addac9e59ecTed Kremenek#endif /* DCT_FLOAT_SUPPORTED */ 169898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner