1// Copyright 2012 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// Image transforms and color space conversion methods for lossless decoder.
11//
12// Authors: Vikas Arora (vikaas.arora@gmail.com)
13//          Jyrki Alakuijala (jyrki@google.com)
14//          Urvang Joshi (urvang@google.com)
15
16#include "./dsp.h"
17
18#include <math.h>
19#include <stdlib.h>
20#include "../dec/vp8li.h"
21#include "../utils/endian_inl.h"
22#include "./lossless.h"
23#include "./yuv.h"
24
25#define MAX_DIFF_COST (1e30f)
26
27// lookup table for small values of log2(int)
28const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
29  0.0000000000000000f, 0.0000000000000000f,
30  1.0000000000000000f, 1.5849625007211560f,
31  2.0000000000000000f, 2.3219280948873621f,
32  2.5849625007211560f, 2.8073549220576041f,
33  3.0000000000000000f, 3.1699250014423121f,
34  3.3219280948873621f, 3.4594316186372973f,
35  3.5849625007211560f, 3.7004397181410921f,
36  3.8073549220576041f, 3.9068905956085187f,
37  4.0000000000000000f, 4.0874628412503390f,
38  4.1699250014423121f, 4.2479275134435852f,
39  4.3219280948873626f, 4.3923174227787606f,
40  4.4594316186372973f, 4.5235619560570130f,
41  4.5849625007211560f, 4.6438561897747243f,
42  4.7004397181410917f, 4.7548875021634682f,
43  4.8073549220576037f, 4.8579809951275718f,
44  4.9068905956085187f, 4.9541963103868749f,
45  5.0000000000000000f, 5.0443941193584533f,
46  5.0874628412503390f, 5.1292830169449663f,
47  5.1699250014423121f, 5.2094533656289501f,
48  5.2479275134435852f, 5.2854022188622487f,
49  5.3219280948873626f, 5.3575520046180837f,
50  5.3923174227787606f, 5.4262647547020979f,
51  5.4594316186372973f, 5.4918530963296747f,
52  5.5235619560570130f, 5.5545888516776376f,
53  5.5849625007211560f, 5.6147098441152083f,
54  5.6438561897747243f, 5.6724253419714951f,
55  5.7004397181410917f, 5.7279204545631987f,
56  5.7548875021634682f, 5.7813597135246599f,
57  5.8073549220576037f, 5.8328900141647412f,
58  5.8579809951275718f, 5.8826430493618415f,
59  5.9068905956085187f, 5.9307373375628866f,
60  5.9541963103868749f, 5.9772799234999167f,
61  6.0000000000000000f, 6.0223678130284543f,
62  6.0443941193584533f, 6.0660891904577720f,
63  6.0874628412503390f, 6.1085244567781691f,
64  6.1292830169449663f, 6.1497471195046822f,
65  6.1699250014423121f, 6.1898245588800175f,
66  6.2094533656289501f, 6.2288186904958804f,
67  6.2479275134435852f, 6.2667865406949010f,
68  6.2854022188622487f, 6.3037807481771030f,
69  6.3219280948873626f, 6.3398500028846243f,
70  6.3575520046180837f, 6.3750394313469245f,
71  6.3923174227787606f, 6.4093909361377017f,
72  6.4262647547020979f, 6.4429434958487279f,
73  6.4594316186372973f, 6.4757334309663976f,
74  6.4918530963296747f, 6.5077946401986963f,
75  6.5235619560570130f, 6.5391588111080309f,
76  6.5545888516776376f, 6.5698556083309478f,
77  6.5849625007211560f, 6.5999128421871278f,
78  6.6147098441152083f, 6.6293566200796094f,
79  6.6438561897747243f, 6.6582114827517946f,
80  6.6724253419714951f, 6.6865005271832185f,
81  6.7004397181410917f, 6.7142455176661224f,
82  6.7279204545631987f, 6.7414669864011464f,
83  6.7548875021634682f, 6.7681843247769259f,
84  6.7813597135246599f, 6.7944158663501061f,
85  6.8073549220576037f, 6.8201789624151878f,
86  6.8328900141647412f, 6.8454900509443747f,
87  6.8579809951275718f, 6.8703647195834047f,
88  6.8826430493618415f, 6.8948177633079437f,
89  6.9068905956085187f, 6.9188632372745946f,
90  6.9307373375628866f, 6.9425145053392398f,
91  6.9541963103868749f, 6.9657842846620869f,
92  6.9772799234999167f, 6.9886846867721654f,
93  7.0000000000000000f, 7.0112272554232539f,
94  7.0223678130284543f, 7.0334230015374501f,
95  7.0443941193584533f, 7.0552824355011898f,
96  7.0660891904577720f, 7.0768155970508308f,
97  7.0874628412503390f, 7.0980320829605263f,
98  7.1085244567781691f, 7.1189410727235076f,
99  7.1292830169449663f, 7.1395513523987936f,
100  7.1497471195046822f, 7.1598713367783890f,
101  7.1699250014423121f, 7.1799090900149344f,
102  7.1898245588800175f, 7.1996723448363644f,
103  7.2094533656289501f, 7.2191685204621611f,
104  7.2288186904958804f, 7.2384047393250785f,
105  7.2479275134435852f, 7.2573878426926521f,
106  7.2667865406949010f, 7.2761244052742375f,
107  7.2854022188622487f, 7.2946207488916270f,
108  7.3037807481771030f, 7.3128829552843557f,
109  7.3219280948873626f, 7.3309168781146167f,
110  7.3398500028846243f, 7.3487281542310771f,
111  7.3575520046180837f, 7.3663222142458160f,
112  7.3750394313469245f, 7.3837042924740519f,
113  7.3923174227787606f, 7.4008794362821843f,
114  7.4093909361377017f, 7.4178525148858982f,
115  7.4262647547020979f, 7.4346282276367245f,
116  7.4429434958487279f, 7.4512111118323289f,
117  7.4594316186372973f, 7.4676055500829976f,
118  7.4757334309663976f, 7.4838157772642563f,
119  7.4918530963296747f, 7.4998458870832056f,
120  7.5077946401986963f, 7.5156998382840427f,
121  7.5235619560570130f, 7.5313814605163118f,
122  7.5391588111080309f, 7.5468944598876364f,
123  7.5545888516776376f, 7.5622424242210728f,
124  7.5698556083309478f, 7.5774288280357486f,
125  7.5849625007211560f, 7.5924570372680806f,
126  7.5999128421871278f, 7.6073303137496104f,
127  7.6147098441152083f, 7.6220518194563764f,
128  7.6293566200796094f, 7.6366246205436487f,
129  7.6438561897747243f, 7.6510516911789281f,
130  7.6582114827517946f, 7.6653359171851764f,
131  7.6724253419714951f, 7.6794800995054464f,
132  7.6865005271832185f, 7.6934869574993252f,
133  7.7004397181410917f, 7.7073591320808825f,
134  7.7142455176661224f, 7.7210991887071855f,
135  7.7279204545631987f, 7.7347096202258383f,
136  7.7414669864011464f, 7.7481928495894605f,
137  7.7548875021634682f, 7.7615512324444795f,
138  7.7681843247769259f, 7.7747870596011736f,
139  7.7813597135246599f, 7.7879025593914317f,
140  7.7944158663501061f, 7.8008998999203047f,
141  7.8073549220576037f, 7.8137811912170374f,
142  7.8201789624151878f, 7.8265484872909150f,
143  7.8328900141647412f, 7.8392037880969436f,
144  7.8454900509443747f, 7.8517490414160571f,
145  7.8579809951275718f, 7.8641861446542797f,
146  7.8703647195834047f, 7.8765169465649993f,
147  7.8826430493618415f, 7.8887432488982591f,
148  7.8948177633079437f, 7.9008668079807486f,
149  7.9068905956085187f, 7.9128893362299619f,
150  7.9188632372745946f, 7.9248125036057812f,
151  7.9307373375628866f, 7.9366379390025709f,
152  7.9425145053392398f, 7.9483672315846778f,
153  7.9541963103868749f, 7.9600019320680805f,
154  7.9657842846620869f, 7.9715435539507719f,
155  7.9772799234999167f, 7.9829935746943103f,
156  7.9886846867721654f, 7.9943534368588577f
157};
158
159const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
160  0.00000000f,    0.00000000f,  2.00000000f,   4.75488750f,
161  8.00000000f,   11.60964047f,  15.50977500f,  19.65148445f,
162  24.00000000f,  28.52932501f,  33.21928095f,  38.05374781f,
163  43.01955001f,  48.10571634f,  53.30296891f,  58.60335893f,
164  64.00000000f,  69.48686830f,  75.05865003f,  80.71062276f,
165  86.43856190f,  92.23866588f,  98.10749561f,  104.04192499f,
166  110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
167  134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
168  160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
169  186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
170  212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
171  240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
172  268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
173  296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
174  325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
175  354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
176  384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
177  413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
178  444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
179  474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
180  505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
181  536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
182  568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
183  600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
184  632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
185  664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
186  696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
187  729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
188  762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
189  795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
190  828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
191  862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
192  896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
193  929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
194  963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
195  998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
196  1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
197  1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
198  1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
199  1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
200  1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
201  1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
202  1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
203  1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
204  1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
205  1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
206  1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
207  1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
208  1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
209  1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
210  1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
211  1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
212  1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
213  1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
214  1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
215  1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
216  1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
217  1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
218  1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
219  1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
220  1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
221  1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
222  1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
223  2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
224};
225
226const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
227  { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1},
228  { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2},
229  { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3},
230  { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3},
231  { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
232  {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
233  {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
234  {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
235  {11, 4}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
236  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
237  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
238  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
239  {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
240  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
241  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
242  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
243  {13, 5}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
244  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
245  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
246  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
247  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
248  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
249  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
250  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
251  {14, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
252  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
253  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
254  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
255  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
256  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
257  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
258  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
259  {15, 6}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
260  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
261  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
262  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
263  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
264  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
265  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
266  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
267  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
268  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
269  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
270  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
271  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
272  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
273  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
274  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
275  {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
276  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
277  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
278  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
279  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
280  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
281  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
282  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
283  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
284  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
285  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
286  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
287  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
288  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
289  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
290  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
291};
292
293const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
294   0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  2,  3,  0,  1,  2,  3,
295   0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
296   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
297   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
298   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
299  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
300   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
301  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
302   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
303  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
304  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
305  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
306   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
307  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
308  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
309  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
310   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
311  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
312  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
313  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
314  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
315  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
316  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
317  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
318  127,
319   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
320  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
321  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
322  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
323  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
324  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
325  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
326  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
327};
328
329// The threshold till approximate version of log_2 can be used.
330// Practically, we can get rid of the call to log() as the two values match to
331// very high degree (the ratio of these two is 0.99999x).
332// Keeping a high threshold for now.
333#define APPROX_LOG_WITH_CORRECTION_MAX  65536
334#define APPROX_LOG_MAX                   4096
335#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
336static float FastSLog2Slow(uint32_t v) {
337  assert(v >= LOG_LOOKUP_IDX_MAX);
338  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
339    int log_cnt = 0;
340    uint32_t y = 1;
341    int correction = 0;
342    const float v_f = (float)v;
343    const uint32_t orig_v = v;
344    do {
345      ++log_cnt;
346      v = v >> 1;
347      y = y << 1;
348    } while (v >= LOG_LOOKUP_IDX_MAX);
349    // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
350    // Xf = floor(Xf) * (1 + (v % y) / v)
351    // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
352    // The correction factor: log(1 + d) ~ d; for very small d values, so
353    // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
354    // LOG_2_RECIPROCAL ~ 23/16
355    correction = (23 * (orig_v & (y - 1))) >> 4;
356    return v_f * (kLog2Table[v] + log_cnt) + correction;
357  } else {
358    return (float)(LOG_2_RECIPROCAL * v * log((double)v));
359  }
360}
361
362static float FastLog2Slow(uint32_t v) {
363  assert(v >= LOG_LOOKUP_IDX_MAX);
364  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
365    int log_cnt = 0;
366    uint32_t y = 1;
367    const uint32_t orig_v = v;
368    double log_2;
369    do {
370      ++log_cnt;
371      v = v >> 1;
372      y = y << 1;
373    } while (v >= LOG_LOOKUP_IDX_MAX);
374    log_2 = kLog2Table[v] + log_cnt;
375    if (orig_v >= APPROX_LOG_MAX) {
376      // Since the division is still expensive, add this correction factor only
377      // for large values of 'v'.
378      const int correction = (23 * (orig_v & (y - 1))) >> 4;
379      log_2 += (double)correction / orig_v;
380    }
381    return (float)log_2;
382  } else {
383    return (float)(LOG_2_RECIPROCAL * log((double)v));
384  }
385}
386
387//------------------------------------------------------------------------------
388// Image transforms.
389
390// Mostly used to reduce code size + readability
391static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
392
393// In-place sum of each component with mod 256.
394static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
395  const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
396  const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
397  *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
398}
399
400static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
401  return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
402}
403
404static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
405  return Average2(Average2(a0, a2), a1);
406}
407
408static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
409                                     uint32_t a2, uint32_t a3) {
410  return Average2(Average2(a0, a1), Average2(a2, a3));
411}
412
413static WEBP_INLINE uint32_t Clip255(uint32_t a) {
414  if (a < 256) {
415    return a;
416  }
417  // return 0, when a is a negative integer.
418  // return 255, when a is positive.
419  return ~a >> 24;
420}
421
422static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
423  return Clip255(a + b - c);
424}
425
426static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
427                                                   uint32_t c2) {
428  const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
429  const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
430                                         (c1 >> 16) & 0xff,
431                                         (c2 >> 16) & 0xff);
432  const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
433                                         (c1 >> 8) & 0xff,
434                                         (c2 >> 8) & 0xff);
435  const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
436  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
437}
438
439static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
440  return Clip255(a + (a - b) / 2);
441}
442
443static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
444                                                   uint32_t c2) {
445  const uint32_t ave = Average2(c0, c1);
446  const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
447  const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
448  const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
449  const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
450  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
451}
452
453// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
454#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
455# define LOCAL_INLINE __attribute__ ((noinline))
456#else
457# define LOCAL_INLINE WEBP_INLINE
458#endif
459
460static LOCAL_INLINE int Sub3(int a, int b, int c) {
461  const int pb = b - c;
462  const int pa = a - c;
463  return abs(pb) - abs(pa);
464}
465
466#undef LOCAL_INLINE
467
468static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
469  const int pa_minus_pb =
470      Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
471      Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
472      Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
473      Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
474  return (pa_minus_pb <= 0) ? a : b;
475}
476
477//------------------------------------------------------------------------------
478// Predictors
479
480static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
481  (void)top;
482  (void)left;
483  return ARGB_BLACK;
484}
485static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
486  (void)top;
487  return left;
488}
489static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
490  (void)left;
491  return top[0];
492}
493static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
494  (void)left;
495  return top[1];
496}
497static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
498  (void)left;
499  return top[-1];
500}
501static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
502  const uint32_t pred = Average3(left, top[0], top[1]);
503  return pred;
504}
505static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
506  const uint32_t pred = Average2(left, top[-1]);
507  return pred;
508}
509static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
510  const uint32_t pred = Average2(left, top[0]);
511  return pred;
512}
513static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
514  const uint32_t pred = Average2(top[-1], top[0]);
515  (void)left;
516  return pred;
517}
518static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
519  const uint32_t pred = Average2(top[0], top[1]);
520  (void)left;
521  return pred;
522}
523static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
524  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
525  return pred;
526}
527static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
528  const uint32_t pred = Select(top[0], left, top[-1]);
529  return pred;
530}
531static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
532  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
533  return pred;
534}
535static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
536  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
537  return pred;
538}
539
540static const VP8LPredictorFunc kPredictorsC[16] = {
541  Predictor0, Predictor1, Predictor2, Predictor3,
542  Predictor4, Predictor5, Predictor6, Predictor7,
543  Predictor8, Predictor9, Predictor10, Predictor11,
544  Predictor12, Predictor13,
545  Predictor0, Predictor0    // <- padding security sentinels
546};
547
548static float PredictionCostSpatial(const int counts[256], int weight_0,
549                                   double exp_val) {
550  const int significant_symbols = 256 >> 4;
551  const double exp_decay_factor = 0.6;
552  double bits = weight_0 * counts[0];
553  int i;
554  for (i = 1; i < significant_symbols; ++i) {
555    bits += exp_val * (counts[i] + counts[256 - i]);
556    exp_val *= exp_decay_factor;
557  }
558  return (float)(-0.1 * bits);
559}
560
561// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
562static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
563  int i;
564  double retval = 0.;
565  int sumX = 0, sumXY = 0;
566  for (i = 0; i < 256; ++i) {
567    const int x = X[i];
568    const int xy = x + Y[i];
569    if (x != 0) {
570      sumX += x;
571      retval -= VP8LFastSLog2(x);
572      sumXY += xy;
573      retval -= VP8LFastSLog2(xy);
574    } else if (xy != 0) {
575      sumXY += xy;
576      retval -= VP8LFastSLog2(xy);
577    }
578  }
579  retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
580  return (float)retval;
581}
582
583static float PredictionCostSpatialHistogram(const int accumulated[4][256],
584                                            const int tile[4][256]) {
585  int i;
586  double retval = 0;
587  for (i = 0; i < 4; ++i) {
588    const double kExpValue = 0.94;
589    retval += PredictionCostSpatial(tile[i], 1, kExpValue);
590    retval += CombinedShannonEntropy(tile[i], accumulated[i]);
591  }
592  return (float)retval;
593}
594
595static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
596  ++histo_argb[0][argb >> 24];
597  ++histo_argb[1][(argb >> 16) & 0xff];
598  ++histo_argb[2][(argb >> 8) & 0xff];
599  ++histo_argb[3][argb & 0xff];
600}
601
602static int GetBestPredictorForTile(int width, int height,
603                                   int tile_x, int tile_y, int bits,
604                                   const int accumulated[4][256],
605                                   const uint32_t* const argb_scratch) {
606  const int kNumPredModes = 14;
607  const int col_start = tile_x << bits;
608  const int row_start = tile_y << bits;
609  const int tile_size = 1 << bits;
610  const int max_y = GetMin(tile_size, height - row_start);
611  const int max_x = GetMin(tile_size, width - col_start);
612  float best_diff = MAX_DIFF_COST;
613  int best_mode = 0;
614  int mode;
615  for (mode = 0; mode < kNumPredModes; ++mode) {
616    const uint32_t* current_row = argb_scratch;
617    const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
618    float cur_diff;
619    int y;
620    int histo_argb[4][256];
621    memset(histo_argb, 0, sizeof(histo_argb));
622    for (y = 0; y < max_y; ++y) {
623      int x;
624      const int row = row_start + y;
625      const uint32_t* const upper_row = current_row;
626      current_row = upper_row + width;
627      for (x = 0; x < max_x; ++x) {
628        const int col = col_start + x;
629        uint32_t predict;
630        if (row == 0) {
631          predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
632        } else if (col == 0) {
633          predict = upper_row[col];  // Top.
634        } else {
635          predict = pred_func(current_row[col - 1], upper_row + col);
636        }
637        UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));
638      }
639    }
640    cur_diff = PredictionCostSpatialHistogram(
641        accumulated, (const int (*)[256])histo_argb);
642    if (cur_diff < best_diff) {
643      best_diff = cur_diff;
644      best_mode = mode;
645    }
646  }
647
648  return best_mode;
649}
650
651static void CopyTileWithPrediction(int width, int height,
652                                   int tile_x, int tile_y, int bits, int mode,
653                                   const uint32_t* const argb_scratch,
654                                   uint32_t* const argb) {
655  const int col_start = tile_x << bits;
656  const int row_start = tile_y << bits;
657  const int tile_size = 1 << bits;
658  const int max_y = GetMin(tile_size, height - row_start);
659  const int max_x = GetMin(tile_size, width - col_start);
660  const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
661  const uint32_t* current_row = argb_scratch;
662
663  int y;
664  for (y = 0; y < max_y; ++y) {
665    int x;
666    const int row = row_start + y;
667    const uint32_t* const upper_row = current_row;
668    current_row = upper_row + width;
669    for (x = 0; x < max_x; ++x) {
670      const int col = col_start + x;
671      const int pix = row * width + col;
672      uint32_t predict;
673      if (row == 0) {
674        predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
675      } else if (col == 0) {
676        predict = upper_row[col];  // Top.
677      } else {
678        predict = pred_func(current_row[col - 1], upper_row + col);
679      }
680      argb[pix] = VP8LSubPixels(current_row[col], predict);
681    }
682  }
683}
684
685void VP8LResidualImage(int width, int height, int bits,
686                       uint32_t* const argb, uint32_t* const argb_scratch,
687                       uint32_t* const image) {
688  const int max_tile_size = 1 << bits;
689  const int tiles_per_row = VP8LSubSampleSize(width, bits);
690  const int tiles_per_col = VP8LSubSampleSize(height, bits);
691  uint32_t* const upper_row = argb_scratch;
692  uint32_t* const current_tile_rows = argb_scratch + width;
693  int tile_y;
694  int histo[4][256];
695  memset(histo, 0, sizeof(histo));
696  for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
697    const int tile_y_offset = tile_y * max_tile_size;
698    const int this_tile_height =
699        (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
700    int tile_x;
701    if (tile_y > 0) {
702      memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
703             width * sizeof(*upper_row));
704    }
705    memcpy(current_tile_rows, &argb[tile_y_offset * width],
706           this_tile_height * width * sizeof(*current_tile_rows));
707    for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
708      int pred;
709      int y;
710      const int tile_x_offset = tile_x * max_tile_size;
711      int all_x_max = tile_x_offset + max_tile_size;
712      if (all_x_max > width) {
713        all_x_max = width;
714      }
715      pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits,
716                                     (const int (*)[256])histo,
717                                     argb_scratch);
718      image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
719      CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
720                             argb_scratch, argb);
721      for (y = 0; y < max_tile_size; ++y) {
722        int ix;
723        int all_x;
724        int all_y = tile_y_offset + y;
725        if (all_y >= height) {
726          break;
727        }
728        ix = all_y * width + tile_x_offset;
729        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
730          UpdateHisto(histo, argb[ix]);
731        }
732      }
733    }
734  }
735}
736
737// Inverse prediction.
738static void PredictorInverseTransform(const VP8LTransform* const transform,
739                                      int y_start, int y_end, uint32_t* data) {
740  const int width = transform->xsize_;
741  if (y_start == 0) {  // First Row follows the L (mode=1) mode.
742    int x;
743    const uint32_t pred0 = Predictor0(data[-1], NULL);
744    AddPixelsEq(data, pred0);
745    for (x = 1; x < width; ++x) {
746      const uint32_t pred1 = Predictor1(data[x - 1], NULL);
747      AddPixelsEq(data + x, pred1);
748    }
749    data += width;
750    ++y_start;
751  }
752
753  {
754    int y = y_start;
755    const int tile_width = 1 << transform->bits_;
756    const int mask = tile_width - 1;
757    const int safe_width = width & ~mask;
758    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
759    const uint32_t* pred_mode_base =
760        transform->data_ + (y >> transform->bits_) * tiles_per_row;
761
762    while (y < y_end) {
763      const uint32_t pred2 = Predictor2(data[-1], data - width);
764      const uint32_t* pred_mode_src = pred_mode_base;
765      VP8LPredictorFunc pred_func;
766      int x = 1;
767      int t = 1;
768      // First pixel follows the T (mode=2) mode.
769      AddPixelsEq(data, pred2);
770      // .. the rest:
771      while (x < safe_width) {
772        pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
773        for (; t < tile_width; ++t, ++x) {
774          const uint32_t pred = pred_func(data[x - 1], data + x - width);
775          AddPixelsEq(data + x, pred);
776        }
777        t = 0;
778      }
779      if (x < width) {
780        pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
781        for (; x < width; ++x) {
782          const uint32_t pred = pred_func(data[x - 1], data + x - width);
783          AddPixelsEq(data + x, pred);
784        }
785      }
786      data += width;
787      ++y;
788      if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
789        pred_mode_base += tiles_per_row;
790      }
791    }
792  }
793}
794
795void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
796  int i;
797  for (i = 0; i < num_pixels; ++i) {
798    const uint32_t argb = argb_data[i];
799    const uint32_t green = (argb >> 8) & 0xff;
800    const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
801    const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
802    argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
803  }
804}
805
806// Add green to blue and red channels (i.e. perform the inverse transform of
807// 'subtract green').
808void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
809  int i;
810  for (i = 0; i < num_pixels; ++i) {
811    const uint32_t argb = data[i];
812    const uint32_t green = ((argb >> 8) & 0xff);
813    uint32_t red_blue = (argb & 0x00ff00ffu);
814    red_blue += (green << 16) | green;
815    red_blue &= 0x00ff00ffu;
816    data[i] = (argb & 0xff00ff00u) | red_blue;
817  }
818}
819
820static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
821  m->green_to_red_ = 0;
822  m->green_to_blue_ = 0;
823  m->red_to_blue_ = 0;
824}
825
826static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
827                                                int8_t color) {
828  return (uint32_t)((int)(color_pred) * color) >> 5;
829}
830
831static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
832                                               VP8LMultipliers* const m) {
833  m->green_to_red_  = (color_code >>  0) & 0xff;
834  m->green_to_blue_ = (color_code >>  8) & 0xff;
835  m->red_to_blue_   = (color_code >> 16) & 0xff;
836}
837
838static WEBP_INLINE uint32_t MultipliersToColorCode(
839    const VP8LMultipliers* const m) {
840  return 0xff000000u |
841         ((uint32_t)(m->red_to_blue_) << 16) |
842         ((uint32_t)(m->green_to_blue_) << 8) |
843         m->green_to_red_;
844}
845
846void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
847                          int num_pixels) {
848  int i;
849  for (i = 0; i < num_pixels; ++i) {
850    const uint32_t argb = data[i];
851    const uint32_t green = argb >> 8;
852    const uint32_t red = argb >> 16;
853    uint32_t new_red = red;
854    uint32_t new_blue = argb;
855    new_red -= ColorTransformDelta(m->green_to_red_, green);
856    new_red &= 0xff;
857    new_blue -= ColorTransformDelta(m->green_to_blue_, green);
858    new_blue -= ColorTransformDelta(m->red_to_blue_, red);
859    new_blue &= 0xff;
860    data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
861  }
862}
863
864void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
865                                 int num_pixels) {
866  int i;
867  for (i = 0; i < num_pixels; ++i) {
868    const uint32_t argb = data[i];
869    const uint32_t green = argb >> 8;
870    const uint32_t red = argb >> 16;
871    uint32_t new_red = red;
872    uint32_t new_blue = argb;
873    new_red += ColorTransformDelta(m->green_to_red_, green);
874    new_red &= 0xff;
875    new_blue += ColorTransformDelta(m->green_to_blue_, green);
876    new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
877    new_blue &= 0xff;
878    data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
879  }
880}
881
882static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
883                                             uint32_t argb) {
884  const uint32_t green = argb >> 8;
885  uint32_t new_red = argb >> 16;
886  new_red -= ColorTransformDelta(green_to_red, green);
887  return (new_red & 0xff);
888}
889
890static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
891                                              uint8_t red_to_blue,
892                                              uint32_t argb) {
893  const uint32_t green = argb >> 8;
894  const uint32_t red = argb >> 16;
895  uint8_t new_blue = argb;
896  new_blue -= ColorTransformDelta(green_to_blue, green);
897  new_blue -= ColorTransformDelta(red_to_blue, red);
898  return (new_blue & 0xff);
899}
900
901static float PredictionCostCrossColor(const int accumulated[256],
902                                      const int counts[256]) {
903  // Favor low entropy, locally and globally.
904  // Favor small absolute values for PredictionCostSpatial
905  static const double kExpValue = 2.4;
906  return CombinedShannonEntropy(counts, accumulated) +
907         PredictionCostSpatial(counts, 3, kExpValue);
908}
909
910static float GetPredictionCostCrossColorRed(
911    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
912    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
913    const int accumulated_red_histo[256], const uint32_t* const argb) {
914  int all_y;
915  int histo[256] = { 0 };
916  float cur_diff;
917  for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
918    int ix = all_y * xsize + tile_x_offset;
919    int all_x;
920    for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
921      ++histo[TransformColorRed(green_to_red, argb[ix])];  // red.
922    }
923  }
924  cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
925  if ((uint8_t)green_to_red == prev_x.green_to_red_) {
926    cur_diff -= 3;  // favor keeping the areas locally similar
927  }
928  if ((uint8_t)green_to_red == prev_y.green_to_red_) {
929    cur_diff -= 3;  // favor keeping the areas locally similar
930  }
931  if (green_to_red == 0) {
932    cur_diff -= 3;
933  }
934  return cur_diff;
935}
936
937static void GetBestGreenToRed(
938    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
939    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
940    const int accumulated_red_histo[256], const uint32_t* const argb,
941    VP8LMultipliers* const best_tx) {
942  int min_green_to_red = -64;
943  int max_green_to_red = 64;
944  int green_to_red = 0;
945  int eval_min = 1;
946  int eval_max = 1;
947  float cur_diff_min = MAX_DIFF_COST;
948  float cur_diff_max = MAX_DIFF_COST;
949  // Do a binary search to find the optimal green_to_red color transform.
950  while (max_green_to_red - min_green_to_red > 2) {
951    if (eval_min) {
952      cur_diff_min = GetPredictionCostCrossColorRed(
953          tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
954          prev_x, prev_y, min_green_to_red, accumulated_red_histo, argb);
955      eval_min = 0;
956    }
957    if (eval_max) {
958      cur_diff_max = GetPredictionCostCrossColorRed(
959          tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
960          prev_x, prev_y, max_green_to_red, accumulated_red_histo, argb);
961      eval_max = 0;
962    }
963    if (cur_diff_min < cur_diff_max) {
964      green_to_red = min_green_to_red;
965      max_green_to_red = (max_green_to_red + min_green_to_red) / 2;
966      eval_max = 1;
967    } else {
968      green_to_red = max_green_to_red;
969      min_green_to_red = (max_green_to_red + min_green_to_red) / 2;
970      eval_min = 1;
971    }
972  }
973  best_tx->green_to_red_ = green_to_red;
974}
975
976static float GetPredictionCostCrossColorBlue(
977    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
978    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
979    int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],
980    const uint32_t* const argb) {
981  int all_y;
982  int histo[256] = { 0 };
983  float cur_diff;
984  for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
985    int all_x;
986    int ix = all_y * xsize + tile_x_offset;
987    for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
988      ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
989    }
990  }
991  cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
992  if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
993    cur_diff -= 3;  // favor keeping the areas locally similar
994  }
995  if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
996    cur_diff -= 3;  // favor keeping the areas locally similar
997  }
998  if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
999    cur_diff -= 3;  // favor keeping the areas locally similar
1000  }
1001  if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
1002    cur_diff -= 3;  // favor keeping the areas locally similar
1003  }
1004  if (green_to_blue == 0) {
1005    cur_diff -= 3;
1006  }
1007  if (red_to_blue == 0) {
1008    cur_diff -= 3;
1009  }
1010  return cur_diff;
1011}
1012
1013static void GetBestGreenRedToBlue(
1014    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
1015    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
1016    const int accumulated_blue_histo[256], const uint32_t* const argb,
1017    VP8LMultipliers* const best_tx) {
1018  float best_diff = MAX_DIFF_COST;
1019  float cur_diff;
1020  const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
1021  const int min_green_to_blue = -32;
1022  const int max_green_to_blue = 32;
1023  const int min_red_to_blue = -32;
1024  const int max_red_to_blue = 32;
1025  const int num_iters =
1026      (1 + (max_green_to_blue - min_green_to_blue) / step) *
1027      (1 + (max_red_to_blue - min_red_to_blue) / step);
1028  // Number of tries to get optimal green_to_blue & red_to_blue color transforms
1029  // after finding a local minima.
1030  const int max_tries_after_min = 4 + (num_iters >> 2);
1031  int num_tries_after_min = 0;
1032  int green_to_blue;
1033  for (green_to_blue = min_green_to_blue;
1034       green_to_blue <= max_green_to_blue &&
1035       num_tries_after_min < max_tries_after_min;
1036       green_to_blue += step) {
1037    int red_to_blue;
1038    for (red_to_blue = min_red_to_blue;
1039         red_to_blue <= max_red_to_blue &&
1040         num_tries_after_min < max_tries_after_min;
1041         red_to_blue += step) {
1042      cur_diff = GetPredictionCostCrossColorBlue(
1043          tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,
1044          prev_y, green_to_blue, red_to_blue, accumulated_blue_histo, argb);
1045      if (cur_diff < best_diff) {
1046        best_diff = cur_diff;
1047        best_tx->green_to_blue_ = green_to_blue;
1048        best_tx->red_to_blue_ = red_to_blue;
1049        num_tries_after_min = 0;
1050      } else {
1051        ++num_tries_after_min;
1052      }
1053    }
1054  }
1055}
1056
1057static VP8LMultipliers GetBestColorTransformForTile(
1058    int tile_x, int tile_y, int bits,
1059    VP8LMultipliers prev_x,
1060    VP8LMultipliers prev_y,
1061    int quality, int xsize, int ysize,
1062    const int accumulated_red_histo[256],
1063    const int accumulated_blue_histo[256],
1064    const uint32_t* const argb) {
1065  const int max_tile_size = 1 << bits;
1066  const int tile_y_offset = tile_y * max_tile_size;
1067  const int tile_x_offset = tile_x * max_tile_size;
1068  const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
1069  const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
1070  VP8LMultipliers best_tx;
1071  MultipliersClear(&best_tx);
1072
1073  GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
1074                    prev_x, prev_y, accumulated_red_histo, argb, &best_tx);
1075  GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max,
1076                        xsize, prev_x, prev_y, quality, accumulated_blue_histo,
1077                        argb, &best_tx);
1078  return best_tx;
1079}
1080
1081static void CopyTileWithColorTransform(int xsize, int ysize,
1082                                       int tile_x, int tile_y,
1083                                       int max_tile_size,
1084                                       VP8LMultipliers color_transform,
1085                                       uint32_t* argb) {
1086  const int xscan = GetMin(max_tile_size, xsize - tile_x);
1087  int yscan = GetMin(max_tile_size, ysize - tile_y);
1088  argb += tile_y * xsize + tile_x;
1089  while (yscan-- > 0) {
1090    VP8LTransformColor(&color_transform, argb, xscan);
1091    argb += xsize;
1092  }
1093}
1094
1095void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
1096                             uint32_t* const argb, uint32_t* image) {
1097  const int max_tile_size = 1 << bits;
1098  const int tile_xsize = VP8LSubSampleSize(width, bits);
1099  const int tile_ysize = VP8LSubSampleSize(height, bits);
1100  int accumulated_red_histo[256] = { 0 };
1101  int accumulated_blue_histo[256] = { 0 };
1102  int tile_x, tile_y;
1103  VP8LMultipliers prev_x, prev_y;
1104  MultipliersClear(&prev_y);
1105  MultipliersClear(&prev_x);
1106  for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
1107    for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
1108      int y;
1109      const int tile_x_offset = tile_x * max_tile_size;
1110      const int tile_y_offset = tile_y * max_tile_size;
1111      const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
1112      const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
1113      const int offset = tile_y * tile_xsize + tile_x;
1114      if (tile_y != 0) {
1115        ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
1116      }
1117      prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
1118                                            prev_x, prev_y,
1119                                            quality, width, height,
1120                                            accumulated_red_histo,
1121                                            accumulated_blue_histo,
1122                                            argb);
1123      image[offset] = MultipliersToColorCode(&prev_x);
1124      CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
1125                                 max_tile_size, prev_x, argb);
1126
1127      // Gather accumulated histogram data.
1128      for (y = tile_y_offset; y < all_y_max; ++y) {
1129        int ix = y * width + tile_x_offset;
1130        const int ix_end = ix + all_x_max - tile_x_offset;
1131        for (; ix < ix_end; ++ix) {
1132          const uint32_t pix = argb[ix];
1133          if (ix >= 2 &&
1134              pix == argb[ix - 2] &&
1135              pix == argb[ix - 1]) {
1136            continue;  // repeated pixels are handled by backward references
1137          }
1138          if (ix >= width + 2 &&
1139              argb[ix - 2] == argb[ix - width - 2] &&
1140              argb[ix - 1] == argb[ix - width - 1] &&
1141              pix == argb[ix - width]) {
1142            continue;  // repeated pixels are handled by backward references
1143          }
1144          ++accumulated_red_histo[(pix >> 16) & 0xff];
1145          ++accumulated_blue_histo[(pix >> 0) & 0xff];
1146        }
1147      }
1148    }
1149  }
1150}
1151
1152// Color space inverse transform.
1153static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
1154                                       int y_start, int y_end, uint32_t* data) {
1155  const int width = transform->xsize_;
1156  const int tile_width = 1 << transform->bits_;
1157  const int mask = tile_width - 1;
1158  const int safe_width = width & ~mask;
1159  const int remaining_width = width - safe_width;
1160  const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
1161  int y = y_start;
1162  const uint32_t* pred_row =
1163      transform->data_ + (y >> transform->bits_) * tiles_per_row;
1164
1165  while (y < y_end) {
1166    const uint32_t* pred = pred_row;
1167    VP8LMultipliers m = { 0, 0, 0 };
1168    const uint32_t* const data_safe_end = data + safe_width;
1169    const uint32_t* const data_end = data + width;
1170    while (data < data_safe_end) {
1171      ColorCodeToMultipliers(*pred++, &m);
1172      VP8LTransformColorInverse(&m, data, tile_width);
1173      data += tile_width;
1174    }
1175    if (data < data_end) {  // Left-overs using C-version.
1176      ColorCodeToMultipliers(*pred++, &m);
1177      VP8LTransformColorInverse(&m, data, remaining_width);
1178      data += remaining_width;
1179    }
1180    ++y;
1181    if ((y & mask) == 0) pred_row += tiles_per_row;;
1182  }
1183}
1184
1185// Separate out pixels packed together using pixel-bundling.
1186// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
1187#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)             \
1188void FUNC_NAME(const VP8LTransform* const transform,                           \
1189               int y_start, int y_end, const TYPE* src, TYPE* dst) {           \
1190  int y;                                                                       \
1191  const int bits_per_pixel = 8 >> transform->bits_;                            \
1192  const int width = transform->xsize_;                                         \
1193  const uint32_t* const color_map = transform->data_;                          \
1194  if (bits_per_pixel < 8) {                                                    \
1195    const int pixels_per_byte = 1 << transform->bits_;                         \
1196    const int count_mask = pixels_per_byte - 1;                                \
1197    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
1198    for (y = y_start; y < y_end; ++y) {                                        \
1199      uint32_t packed_pixels = 0;                                              \
1200      int x;                                                                   \
1201      for (x = 0; x < width; ++x) {                                            \
1202        /* We need to load fresh 'packed_pixels' once every                */  \
1203        /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
1204        /* is a power of 2, so can just use a mask for that, instead of    */  \
1205        /* decrementing a counter.                                         */  \
1206        if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
1207        *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
1208        packed_pixels >>= bits_per_pixel;                                      \
1209      }                                                                        \
1210    }                                                                          \
1211  } else {                                                                     \
1212    for (y = y_start; y < y_end; ++y) {                                        \
1213      int x;                                                                   \
1214      for (x = 0; x < width; ++x) {                                            \
1215        *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                      \
1216      }                                                                        \
1217    }                                                                          \
1218  }                                                                            \
1219}
1220
1221static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
1222  return (idx >> 8) & 0xff;
1223}
1224
1225static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
1226  return idx;
1227}
1228
1229static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
1230  return val;
1231}
1232
1233static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
1234  return (val >> 8) & 0xff;
1235}
1236
1237static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
1238                           GetARGBValue)
1239COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
1240                    GetAlphaValue)
1241
1242#undef COLOR_INDEX_INVERSE
1243
1244void VP8LInverseTransform(const VP8LTransform* const transform,
1245                          int row_start, int row_end,
1246                          const uint32_t* const in, uint32_t* const out) {
1247  const int width = transform->xsize_;
1248  assert(row_start < row_end);
1249  assert(row_end <= transform->ysize_);
1250  switch (transform->type_) {
1251    case SUBTRACT_GREEN:
1252      VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
1253      break;
1254    case PREDICTOR_TRANSFORM:
1255      PredictorInverseTransform(transform, row_start, row_end, out);
1256      if (row_end != transform->ysize_) {
1257        // The last predicted row in this iteration will be the top-pred row
1258        // for the first row in next iteration.
1259        memcpy(out - width, out + (row_end - row_start - 1) * width,
1260               width * sizeof(*out));
1261      }
1262      break;
1263    case CROSS_COLOR_TRANSFORM:
1264      ColorSpaceInverseTransform(transform, row_start, row_end, out);
1265      break;
1266    case COLOR_INDEXING_TRANSFORM:
1267      if (in == out && transform->bits_ > 0) {
1268        // Move packed pixels to the end of unpacked region, so that unpacking
1269        // can occur seamlessly.
1270        // Also, note that this is the only transform that applies on
1271        // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1272        // transforms work on effective width of xsize_.
1273        const int out_stride = (row_end - row_start) * width;
1274        const int in_stride = (row_end - row_start) *
1275            VP8LSubSampleSize(transform->xsize_, transform->bits_);
1276        uint32_t* const src = out + out_stride - in_stride;
1277        memmove(src, out, in_stride * sizeof(*src));
1278        ColorIndexInverseTransform(transform, row_start, row_end, src, out);
1279      } else {
1280        ColorIndexInverseTransform(transform, row_start, row_end, in, out);
1281      }
1282      break;
1283  }
1284}
1285
1286//------------------------------------------------------------------------------
1287// Color space conversion.
1288
1289static int is_big_endian(void) {
1290  static const union {
1291    uint16_t w;
1292    uint8_t b[2];
1293  } tmp = { 1 };
1294  return (tmp.b[0] != 1);
1295}
1296
1297void VP8LConvertBGRAToRGB_C(const uint32_t* src,
1298                            int num_pixels, uint8_t* dst) {
1299  const uint32_t* const src_end = src + num_pixels;
1300  while (src < src_end) {
1301    const uint32_t argb = *src++;
1302    *dst++ = (argb >> 16) & 0xff;
1303    *dst++ = (argb >>  8) & 0xff;
1304    *dst++ = (argb >>  0) & 0xff;
1305  }
1306}
1307
1308void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
1309                             int num_pixels, uint8_t* dst) {
1310  const uint32_t* const src_end = src + num_pixels;
1311  while (src < src_end) {
1312    const uint32_t argb = *src++;
1313    *dst++ = (argb >> 16) & 0xff;
1314    *dst++ = (argb >>  8) & 0xff;
1315    *dst++ = (argb >>  0) & 0xff;
1316    *dst++ = (argb >> 24) & 0xff;
1317  }
1318}
1319
1320void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
1321                                 int num_pixels, uint8_t* dst) {
1322  const uint32_t* const src_end = src + num_pixels;
1323  while (src < src_end) {
1324    const uint32_t argb = *src++;
1325    const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
1326    const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
1327#ifdef WEBP_SWAP_16BIT_CSP
1328    *dst++ = ba;
1329    *dst++ = rg;
1330#else
1331    *dst++ = rg;
1332    *dst++ = ba;
1333#endif
1334  }
1335}
1336
1337void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
1338                               int num_pixels, uint8_t* dst) {
1339  const uint32_t* const src_end = src + num_pixels;
1340  while (src < src_end) {
1341    const uint32_t argb = *src++;
1342    const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
1343    const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
1344#ifdef WEBP_SWAP_16BIT_CSP
1345    *dst++ = gb;
1346    *dst++ = rg;
1347#else
1348    *dst++ = rg;
1349    *dst++ = gb;
1350#endif
1351  }
1352}
1353
1354void VP8LConvertBGRAToBGR_C(const uint32_t* src,
1355                            int num_pixels, uint8_t* dst) {
1356  const uint32_t* const src_end = src + num_pixels;
1357  while (src < src_end) {
1358    const uint32_t argb = *src++;
1359    *dst++ = (argb >>  0) & 0xff;
1360    *dst++ = (argb >>  8) & 0xff;
1361    *dst++ = (argb >> 16) & 0xff;
1362  }
1363}
1364
1365static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
1366                       int swap_on_big_endian) {
1367  if (is_big_endian() == swap_on_big_endian) {
1368    const uint32_t* const src_end = src + num_pixels;
1369    while (src < src_end) {
1370      const uint32_t argb = *src++;
1371
1372#if !defined(WORDS_BIGENDIAN)
1373#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
1374      *(uint32_t*)dst = BSwap32(argb);
1375#else  // WEBP_REFERENCE_IMPLEMENTATION
1376      dst[0] = (argb >> 24) & 0xff;
1377      dst[1] = (argb >> 16) & 0xff;
1378      dst[2] = (argb >>  8) & 0xff;
1379      dst[3] = (argb >>  0) & 0xff;
1380#endif
1381#else  // WORDS_BIGENDIAN
1382      dst[0] = (argb >>  0) & 0xff;
1383      dst[1] = (argb >>  8) & 0xff;
1384      dst[2] = (argb >> 16) & 0xff;
1385      dst[3] = (argb >> 24) & 0xff;
1386#endif
1387      dst += sizeof(argb);
1388    }
1389  } else {
1390    memcpy(dst, src, num_pixels * sizeof(*src));
1391  }
1392}
1393
1394void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
1395                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
1396  switch (out_colorspace) {
1397    case MODE_RGB:
1398      VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
1399      break;
1400    case MODE_RGBA:
1401      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
1402      break;
1403    case MODE_rgbA:
1404      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
1405      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1406      break;
1407    case MODE_BGR:
1408      VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
1409      break;
1410    case MODE_BGRA:
1411      CopyOrSwap(in_data, num_pixels, rgba, 1);
1412      break;
1413    case MODE_bgrA:
1414      CopyOrSwap(in_data, num_pixels, rgba, 1);
1415      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1416      break;
1417    case MODE_ARGB:
1418      CopyOrSwap(in_data, num_pixels, rgba, 0);
1419      break;
1420    case MODE_Argb:
1421      CopyOrSwap(in_data, num_pixels, rgba, 0);
1422      WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
1423      break;
1424    case MODE_RGBA_4444:
1425      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1426      break;
1427    case MODE_rgbA_4444:
1428      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1429      WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
1430      break;
1431    case MODE_RGB_565:
1432      VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
1433      break;
1434    default:
1435      assert(0);          // Code flow should not reach here.
1436  }
1437}
1438
1439//------------------------------------------------------------------------------
1440// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
1441void VP8LBundleColorMap(const uint8_t* const row, int width,
1442                        int xbits, uint32_t* const dst) {
1443  int x;
1444  if (xbits > 0) {
1445    const int bit_depth = 1 << (3 - xbits);
1446    const int mask = (1 << xbits) - 1;
1447    uint32_t code = 0xff000000;
1448    for (x = 0; x < width; ++x) {
1449      const int xsub = x & mask;
1450      if (xsub == 0) {
1451        code = 0xff000000;
1452      }
1453      code |= row[x] << (8 + bit_depth * xsub);
1454      dst[x >> xbits] = code;
1455    }
1456  } else {
1457    for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
1458  }
1459}
1460
1461//------------------------------------------------------------------------------
1462
1463static double ExtraCost(const uint32_t* population, int length) {
1464  int i;
1465  double cost = 0.;
1466  for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
1467  return cost;
1468}
1469
1470static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
1471                                int length) {
1472  int i;
1473  double cost = 0.;
1474  for (i = 2; i < length - 2; ++i) {
1475    const int xy = X[i + 2] + Y[i + 2];
1476    cost += (i >> 1) * xy;
1477  }
1478  return cost;
1479}
1480
1481// Returns the various RLE counts
1482static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
1483  int i;
1484  int streak = 0;
1485  VP8LStreaks stats;
1486  memset(&stats, 0, sizeof(stats));
1487  for (i = 0; i < length - 1; ++i) {
1488    ++streak;
1489    if (population[i] == population[i + 1]) {
1490      continue;
1491    }
1492    stats.counts[population[i] != 0] += (streak > 3);
1493    stats.streaks[population[i] != 0][(streak > 3)] += streak;
1494    streak = 0;
1495  }
1496  ++streak;
1497  stats.counts[population[i] != 0] += (streak > 3);
1498  stats.streaks[population[i] != 0][(streak > 3)] += streak;
1499  return stats;
1500}
1501
1502static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
1503                                            const uint32_t* Y, int length) {
1504  int i;
1505  int streak = 0;
1506  VP8LStreaks stats;
1507  memset(&stats, 0, sizeof(stats));
1508  for (i = 0; i < length - 1; ++i) {
1509    const int xy = X[i] + Y[i];
1510    const int xy_next = X[i + 1] + Y[i + 1];
1511    ++streak;
1512    if (xy == xy_next) {
1513      continue;
1514    }
1515    stats.counts[xy != 0] += (streak > 3);
1516    stats.streaks[xy != 0][(streak > 3)] += streak;
1517    streak = 0;
1518  }
1519  {
1520    const int xy = X[i] + Y[i];
1521    ++streak;
1522    stats.counts[xy != 0] += (streak > 3);
1523    stats.streaks[xy != 0][(streak > 3)] += streak;
1524  }
1525  return stats;
1526}
1527
1528//------------------------------------------------------------------------------
1529
1530static void HistogramAdd(const VP8LHistogram* const a,
1531                         const VP8LHistogram* const b,
1532                         VP8LHistogram* const out) {
1533  int i;
1534  const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
1535  assert(a->palette_code_bits_ == b->palette_code_bits_);
1536  if (b != out) {
1537    for (i = 0; i < literal_size; ++i) {
1538      out->literal_[i] = a->literal_[i] + b->literal_[i];
1539    }
1540    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
1541      out->distance_[i] = a->distance_[i] + b->distance_[i];
1542    }
1543    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
1544      out->red_[i] = a->red_[i] + b->red_[i];
1545      out->blue_[i] = a->blue_[i] + b->blue_[i];
1546      out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
1547    }
1548  } else {
1549    for (i = 0; i < literal_size; ++i) {
1550      out->literal_[i] += a->literal_[i];
1551    }
1552    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
1553      out->distance_[i] += a->distance_[i];
1554    }
1555    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
1556      out->red_[i] += a->red_[i];
1557      out->blue_[i] += a->blue_[i];
1558      out->alpha_[i] += a->alpha_[i];
1559    }
1560  }
1561}
1562
1563//------------------------------------------------------------------------------
1564
1565VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
1566VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
1567VP8LPredictorFunc VP8LPredictors[16];
1568
1569VP8LTransformColorFunc VP8LTransformColor;
1570VP8LTransformColorFunc VP8LTransformColorInverse;
1571
1572VP8LConvertFunc VP8LConvertBGRAToRGB;
1573VP8LConvertFunc VP8LConvertBGRAToRGBA;
1574VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
1575VP8LConvertFunc VP8LConvertBGRAToRGB565;
1576VP8LConvertFunc VP8LConvertBGRAToBGR;
1577
1578VP8LFastLog2SlowFunc VP8LFastLog2Slow;
1579VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
1580
1581VP8LCostFunc VP8LExtraCost;
1582VP8LCostCombinedFunc VP8LExtraCostCombined;
1583
1584VP8LCostCountFunc VP8LHuffmanCostCount;
1585VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
1586
1587VP8LHistogramAddFunc VP8LHistogramAdd;
1588
1589extern void VP8LDspInitSSE2(void);
1590extern void VP8LDspInitNEON(void);
1591extern void VP8LDspInitMIPS32(void);
1592
1593void VP8LDspInit(void) {
1594  memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));
1595
1596  VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
1597  VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
1598
1599  VP8LTransformColor = VP8LTransformColor_C;
1600  VP8LTransformColorInverse = VP8LTransformColorInverse_C;
1601
1602  VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
1603  VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
1604  VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
1605  VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
1606  VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
1607
1608  VP8LFastLog2Slow = FastLog2Slow;
1609  VP8LFastSLog2Slow = FastSLog2Slow;
1610
1611  VP8LExtraCost = ExtraCost;
1612  VP8LExtraCostCombined = ExtraCostCombined;
1613
1614  VP8LHuffmanCostCount = HuffmanCostCount;
1615  VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
1616
1617  VP8LHistogramAdd = HistogramAdd;
1618
1619  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
1620  if (VP8GetCPUInfo != NULL) {
1621#if defined(WEBP_USE_SSE2)
1622    if (VP8GetCPUInfo(kSSE2)) {
1623      VP8LDspInitSSE2();
1624    }
1625#endif
1626#if defined(WEBP_USE_NEON)
1627    if (VP8GetCPUInfo(kNEON)) {
1628      VP8LDspInitNEON();
1629    }
1630#endif
1631#if defined(WEBP_USE_MIPS32)
1632    if (VP8GetCPUInfo(kMIPS32)) {
1633      VP8LDspInitMIPS32();
1634    }
1635#endif
1636  }
1637}
1638
1639//------------------------------------------------------------------------------
1640