1274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek/*
2274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * jfdctflt.c
3274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek *
4274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * Copyright (C) 1994-1996, Thomas G. Lane.
5274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * This file is part of the Independent JPEG Group's software.
6274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * For conditions of distribution and use, see the accompanying README file.
7274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek *
8274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * This file contains a floating-point implementation of the
9274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * forward DCT (Discrete Cosine Transform).
10274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek *
11274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * This implementation should be more accurate than either of the integer
12274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * DCT implementations.  However, it may not give the same results on all
13274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * machines because of differences in roundoff behavior.  Speed will depend
140c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * on the hardware's floating point capacity.
150c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek *
160c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
17274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * on each column.  Direct algorithms are also available, but they are
18274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * much more complex and seem not to be any faster when reduced to code.
190c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek *
200c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * This implementation is based on Arai, Agui, and Nakajima's algorithm for
210c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
220c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * Japanese, but the algorithm is described in the Pennebaker & Mitchell
230c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek * JPEG textbook (see REFERENCES section in file README).  The following code
246f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner * is based directly on figure 4-8 in P&M.
256f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner * While an 8-point DCT cannot be done in less than 11 multiplies, it is
266f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner * possible to arrange the computation so that many of the multiplies are
276f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner * simple scalings of the final outputs.  These multiplies can then be
28274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * folded into the multiplications or divisions by the JPEG quantization
29274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
307b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek * to be done in the DCT itself.
31268ee7016a2811803989487c0ad3799486092c63Ted Kremenek * The primary disadvantage of this method is that with a fixed-point
32e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek * implementation, accuracy is lost due to imprecise representation of the
33e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek * scaled quantization values.  However, that problem does not arise if
34e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek * we use floating point arithmetic.
35e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek */
365ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner
37d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek#define JPEG_INTERNALS
38da9d61c96c412f6babc7f824152609562f302388Chris Lattner#include "jinclude.h"
39da9d61c96c412f6babc7f824152609562f302388Chris Lattner#include "jpeglib.h"
40da9d61c96c412f6babc7f824152609562f302388Chris Lattner#include "jdct.h"		/* Private declarations for DCT subsystem */
41da9d61c96c412f6babc7f824152609562f302388Chris Lattner
42da9d61c96c412f6babc7f824152609562f302388Chris Lattner#ifdef DCT_FLOAT_SUPPORTED
43d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
44d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
45d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek/*
46d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek * This module is specialized to the case DCTSIZE = 8.
47d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek */
48d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
49d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek#if DCTSIZE != 8
50d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
51d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek#endif
525ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner
53fbc33388c199d6f731170bf55719d57373a09c1fChris Lattner
54fbc33388c199d6f731170bf55719d57373a09c1fChris Lattner/*
55f15674c680730c652a37a16a5d3f3ff429b0c308Chris Lattner * Perform the forward DCT on one block of samples.
566f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner */
576f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner
58da9d61c96c412f6babc7f824152609562f302388Chris LattnerGLOBAL(void)
59e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenekjpeg_fdct_float (FAST_FLOAT * data)
60e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek{
61e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
627e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
637e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
647e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  FAST_FLOAT *dataptr;
657e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  int ctr;
667e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
677e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  /* Pass 1: process rows. */
687e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
697e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  dataptr = data;
707e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
717e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    tmp0 = dataptr[0] + dataptr[7];
727e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    tmp7 = dataptr[0] - dataptr[7];
737e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    tmp1 = dataptr[1] + dataptr[6];
747e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    tmp6 = dataptr[1] - dataptr[6];
757e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    tmp2 = dataptr[2] + dataptr[5];
767e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    tmp5 = dataptr[2] - dataptr[5];
77da9d61c96c412f6babc7f824152609562f302388Chris Lattner    tmp3 = dataptr[3] + dataptr[4];
78e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    tmp4 = dataptr[3] - dataptr[4];
79e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
80e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    /* Even part */
81e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
82da9d61c96c412f6babc7f824152609562f302388Chris Lattner    tmp10 = tmp0 + tmp3;	/* phase 2 */
83277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek    tmp13 = tmp0 - tmp3;
842b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner    tmp11 = tmp1 + tmp2;
85277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek    tmp12 = tmp1 - tmp2;
862b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner
872b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner    dataptr[0] = tmp10 + tmp11; /* phase 3 */
885f074266cc59563036c40516c814d63825723e20Ted Kremenek    dataptr[4] = tmp10 - tmp11;
89274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek
90e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
91e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    dataptr[2] = tmp13 + z1;	/* phase 5 */
92866bdf74547efe32c320554837ffce00fcc084feTed Kremenek    dataptr[6] = tmp13 - z1;
93866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
94866bdf74547efe32c320554837ffce00fcc084feTed Kremenek    /* Odd part */
95866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
96e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    tmp10 = tmp4 + tmp5;	/* phase 2 */
97866bdf74547efe32c320554837ffce00fcc084feTed Kremenek    tmp11 = tmp5 + tmp6;
98aff6ef8e7bc3c3739f984c390e0af693e60be064Chris Lattner    tmp12 = tmp6 + tmp7;
99866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
1001b5285e1ba31975864da356b2ed927e87670e654Chris Lattner    /* The rotator is modified from fig 4-8 to avoid extra negations. */
1015ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
1025ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
1035ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
1047b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
1057b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek
1067b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek    z11 = tmp7 + z3;		/* phase 5 */
107aff6ef8e7bc3c3739f984c390e0af693e60be064Chris Lattner    z13 = tmp7 - z3;
1087b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek
109aff6ef8e7bc3c3739f984c390e0af693e60be064Chris Lattner    dataptr[5] = z13 + z2;	/* phase 6 */
110e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    dataptr[3] = z13 - z2;
111866bdf74547efe32c320554837ffce00fcc084feTed Kremenek    dataptr[1] = z11 + z4;
112866bdf74547efe32c320554837ffce00fcc084feTed Kremenek    dataptr[7] = z11 - z4;
113866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
114e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    dataptr += DCTSIZE;		/* advance pointer to next row */
115866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  }
116898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner
117898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner  /* Pass 2: process columns. */
11859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek
1192b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner  dataptr = data;
120866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
12189d7ee9619d2dbdfa8d956a695c612a104a92cadTed Kremenek    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
122d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
123277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
124277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
125277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
126277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
127d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
128d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
129863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner
130d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    /* Even part */
131863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner
132863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner    tmp10 = tmp0 + tmp3;	/* phase 2 */
133863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner    tmp13 = tmp0 - tmp3;
134863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner    tmp11 = tmp1 + tmp2;
135863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner    tmp12 = tmp1 - tmp2;
136d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner
137d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
138d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    dataptr[DCTSIZE*4] = tmp10 - tmp11;
139d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner
140d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
141866bdf74547efe32c320554837ffce00fcc084feTed Kremenek    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
142866bdf74547efe32c320554837ffce00fcc084feTed Kremenek    dataptr[DCTSIZE*6] = tmp13 - z1;
143866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
1445f074266cc59563036c40516c814d63825723e20Ted Kremenek    /* Odd part */
1455f074266cc59563036c40516c814d63825723e20Ted Kremenek
1465f074266cc59563036c40516c814d63825723e20Ted Kremenek    tmp10 = tmp4 + tmp5;	/* phase 2 */
1475f074266cc59563036c40516c814d63825723e20Ted Kremenek    tmp11 = tmp5 + tmp6;
1485f074266cc59563036c40516c814d63825723e20Ted Kremenek    tmp12 = tmp6 + tmp7;
1495f074266cc59563036c40516c814d63825723e20Ted Kremenek
1505f074266cc59563036c40516c814d63825723e20Ted Kremenek    /* The rotator is modified from fig 4-8 to avoid extra negations. */
15189d7ee9619d2dbdfa8d956a695c612a104a92cadTed Kremenek    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
152898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
153e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
154e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
155e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
156cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek    z11 = tmp7 + z3;		/* phase 5 */
15759d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    z13 = tmp7 - z3;
15859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek
15959d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
16059d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    dataptr[DCTSIZE*3] = z13 - z2;
16159d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    dataptr[DCTSIZE*1] = z11 + z4;
16259d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    dataptr[DCTSIZE*7] = z11 - z4;
163d6f53dc4951aace69014619761760addac9e59ecTed Kremenek
16459d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    dataptr++;			/* advance pointer to next column */
165cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek  }
166cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek}
167cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek
168d6f53dc4951aace69014619761760addac9e59ecTed Kremenek#endif /* DCT_FLOAT_SUPPORTED */
169898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner