1// Copyright 2012 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// Image transforms and color space conversion methods for lossless decoder.
11//
12// Authors: Vikas Arora (vikaas.arora@gmail.com)
13//          Jyrki Alakuijala (jyrki@google.com)
14//          Urvang Joshi (urvang@google.com)
15
16#include "./dsp.h"
17
18#include <math.h>
19#include <stdlib.h>
20#include "../dec/vp8li.h"
21#include "../utils/endian_inl.h"
22#include "./lossless.h"
23#include "./yuv.h"
24
25#define MAX_DIFF_COST (1e30f)
26
27// lookup table for small values of log2(int)
28const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
29  0.0000000000000000f, 0.0000000000000000f,
30  1.0000000000000000f, 1.5849625007211560f,
31  2.0000000000000000f, 2.3219280948873621f,
32  2.5849625007211560f, 2.8073549220576041f,
33  3.0000000000000000f, 3.1699250014423121f,
34  3.3219280948873621f, 3.4594316186372973f,
35  3.5849625007211560f, 3.7004397181410921f,
36  3.8073549220576041f, 3.9068905956085187f,
37  4.0000000000000000f, 4.0874628412503390f,
38  4.1699250014423121f, 4.2479275134435852f,
39  4.3219280948873626f, 4.3923174227787606f,
40  4.4594316186372973f, 4.5235619560570130f,
41  4.5849625007211560f, 4.6438561897747243f,
42  4.7004397181410917f, 4.7548875021634682f,
43  4.8073549220576037f, 4.8579809951275718f,
44  4.9068905956085187f, 4.9541963103868749f,
45  5.0000000000000000f, 5.0443941193584533f,
46  5.0874628412503390f, 5.1292830169449663f,
47  5.1699250014423121f, 5.2094533656289501f,
48  5.2479275134435852f, 5.2854022188622487f,
49  5.3219280948873626f, 5.3575520046180837f,
50  5.3923174227787606f, 5.4262647547020979f,
51  5.4594316186372973f, 5.4918530963296747f,
52  5.5235619560570130f, 5.5545888516776376f,
53  5.5849625007211560f, 5.6147098441152083f,
54  5.6438561897747243f, 5.6724253419714951f,
55  5.7004397181410917f, 5.7279204545631987f,
56  5.7548875021634682f, 5.7813597135246599f,
57  5.8073549220576037f, 5.8328900141647412f,
58  5.8579809951275718f, 5.8826430493618415f,
59  5.9068905956085187f, 5.9307373375628866f,
60  5.9541963103868749f, 5.9772799234999167f,
61  6.0000000000000000f, 6.0223678130284543f,
62  6.0443941193584533f, 6.0660891904577720f,
63  6.0874628412503390f, 6.1085244567781691f,
64  6.1292830169449663f, 6.1497471195046822f,
65  6.1699250014423121f, 6.1898245588800175f,
66  6.2094533656289501f, 6.2288186904958804f,
67  6.2479275134435852f, 6.2667865406949010f,
68  6.2854022188622487f, 6.3037807481771030f,
69  6.3219280948873626f, 6.3398500028846243f,
70  6.3575520046180837f, 6.3750394313469245f,
71  6.3923174227787606f, 6.4093909361377017f,
72  6.4262647547020979f, 6.4429434958487279f,
73  6.4594316186372973f, 6.4757334309663976f,
74  6.4918530963296747f, 6.5077946401986963f,
75  6.5235619560570130f, 6.5391588111080309f,
76  6.5545888516776376f, 6.5698556083309478f,
77  6.5849625007211560f, 6.5999128421871278f,
78  6.6147098441152083f, 6.6293566200796094f,
79  6.6438561897747243f, 6.6582114827517946f,
80  6.6724253419714951f, 6.6865005271832185f,
81  6.7004397181410917f, 6.7142455176661224f,
82  6.7279204545631987f, 6.7414669864011464f,
83  6.7548875021634682f, 6.7681843247769259f,
84  6.7813597135246599f, 6.7944158663501061f,
85  6.8073549220576037f, 6.8201789624151878f,
86  6.8328900141647412f, 6.8454900509443747f,
87  6.8579809951275718f, 6.8703647195834047f,
88  6.8826430493618415f, 6.8948177633079437f,
89  6.9068905956085187f, 6.9188632372745946f,
90  6.9307373375628866f, 6.9425145053392398f,
91  6.9541963103868749f, 6.9657842846620869f,
92  6.9772799234999167f, 6.9886846867721654f,
93  7.0000000000000000f, 7.0112272554232539f,
94  7.0223678130284543f, 7.0334230015374501f,
95  7.0443941193584533f, 7.0552824355011898f,
96  7.0660891904577720f, 7.0768155970508308f,
97  7.0874628412503390f, 7.0980320829605263f,
98  7.1085244567781691f, 7.1189410727235076f,
99  7.1292830169449663f, 7.1395513523987936f,
100  7.1497471195046822f, 7.1598713367783890f,
101  7.1699250014423121f, 7.1799090900149344f,
102  7.1898245588800175f, 7.1996723448363644f,
103  7.2094533656289501f, 7.2191685204621611f,
104  7.2288186904958804f, 7.2384047393250785f,
105  7.2479275134435852f, 7.2573878426926521f,
106  7.2667865406949010f, 7.2761244052742375f,
107  7.2854022188622487f, 7.2946207488916270f,
108  7.3037807481771030f, 7.3128829552843557f,
109  7.3219280948873626f, 7.3309168781146167f,
110  7.3398500028846243f, 7.3487281542310771f,
111  7.3575520046180837f, 7.3663222142458160f,
112  7.3750394313469245f, 7.3837042924740519f,
113  7.3923174227787606f, 7.4008794362821843f,
114  7.4093909361377017f, 7.4178525148858982f,
115  7.4262647547020979f, 7.4346282276367245f,
116  7.4429434958487279f, 7.4512111118323289f,
117  7.4594316186372973f, 7.4676055500829976f,
118  7.4757334309663976f, 7.4838157772642563f,
119  7.4918530963296747f, 7.4998458870832056f,
120  7.5077946401986963f, 7.5156998382840427f,
121  7.5235619560570130f, 7.5313814605163118f,
122  7.5391588111080309f, 7.5468944598876364f,
123  7.5545888516776376f, 7.5622424242210728f,
124  7.5698556083309478f, 7.5774288280357486f,
125  7.5849625007211560f, 7.5924570372680806f,
126  7.5999128421871278f, 7.6073303137496104f,
127  7.6147098441152083f, 7.6220518194563764f,
128  7.6293566200796094f, 7.6366246205436487f,
129  7.6438561897747243f, 7.6510516911789281f,
130  7.6582114827517946f, 7.6653359171851764f,
131  7.6724253419714951f, 7.6794800995054464f,
132  7.6865005271832185f, 7.6934869574993252f,
133  7.7004397181410917f, 7.7073591320808825f,
134  7.7142455176661224f, 7.7210991887071855f,
135  7.7279204545631987f, 7.7347096202258383f,
136  7.7414669864011464f, 7.7481928495894605f,
137  7.7548875021634682f, 7.7615512324444795f,
138  7.7681843247769259f, 7.7747870596011736f,
139  7.7813597135246599f, 7.7879025593914317f,
140  7.7944158663501061f, 7.8008998999203047f,
141  7.8073549220576037f, 7.8137811912170374f,
142  7.8201789624151878f, 7.8265484872909150f,
143  7.8328900141647412f, 7.8392037880969436f,
144  7.8454900509443747f, 7.8517490414160571f,
145  7.8579809951275718f, 7.8641861446542797f,
146  7.8703647195834047f, 7.8765169465649993f,
147  7.8826430493618415f, 7.8887432488982591f,
148  7.8948177633079437f, 7.9008668079807486f,
149  7.9068905956085187f, 7.9128893362299619f,
150  7.9188632372745946f, 7.9248125036057812f,
151  7.9307373375628866f, 7.9366379390025709f,
152  7.9425145053392398f, 7.9483672315846778f,
153  7.9541963103868749f, 7.9600019320680805f,
154  7.9657842846620869f, 7.9715435539507719f,
155  7.9772799234999167f, 7.9829935746943103f,
156  7.9886846867721654f, 7.9943534368588577f
157};
158
159const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
160  0.00000000f,    0.00000000f,  2.00000000f,   4.75488750f,
161  8.00000000f,   11.60964047f,  15.50977500f,  19.65148445f,
162  24.00000000f,  28.52932501f,  33.21928095f,  38.05374781f,
163  43.01955001f,  48.10571634f,  53.30296891f,  58.60335893f,
164  64.00000000f,  69.48686830f,  75.05865003f,  80.71062276f,
165  86.43856190f,  92.23866588f,  98.10749561f,  104.04192499f,
166  110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
167  134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
168  160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
169  186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
170  212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
171  240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
172  268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
173  296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
174  325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
175  354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
176  384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
177  413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
178  444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
179  474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
180  505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
181  536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
182  568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
183  600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
184  632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
185  664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
186  696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
187  729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
188  762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
189  795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
190  828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
191  862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
192  896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
193  929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
194  963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
195  998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
196  1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
197  1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
198  1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
199  1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
200  1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
201  1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
202  1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
203  1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
204  1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
205  1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
206  1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
207  1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
208  1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
209  1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
210  1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
211  1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
212  1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
213  1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
214  1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
215  1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
216  1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
217  1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
218  1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
219  1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
220  1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
221  1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
222  1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
223  2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
224};
225
226const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
227  { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1},
228  { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2},
229  { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3},
230  { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3},
231  { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
232  {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
233  {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
234  {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
235  {11, 4}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
236  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
237  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
238  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
239  {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
240  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
241  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
242  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
243  {13, 5}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
244  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
245  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
246  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
247  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
248  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
249  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
250  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
251  {14, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
252  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
253  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
254  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
255  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
256  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
257  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
258  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
259  {15, 6}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
260  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
261  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
262  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
263  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
264  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
265  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
266  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
267  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
268  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
269  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
270  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
271  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
272  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
273  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
274  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
275  {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
276  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
277  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
278  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
279  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
280  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
281  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
282  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
283  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
284  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
285  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
286  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
287  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
288  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
289  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
290  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
291};
292
293const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
294   0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  2,  3,  0,  1,  2,  3,
295   0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
296   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
297   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
298   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
299  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
300   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
301  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
302   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
303  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
304  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
305  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
306   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
307  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
308  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
309  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
310   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
311  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
312  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
313  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
314  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
315  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
316  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
317  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
318  127,
319   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
320  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
321  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
322  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
323  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
324  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
325  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
326  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
327};
328
329// The threshold till approximate version of log_2 can be used.
330// Practically, we can get rid of the call to log() as the two values match to
331// very high degree (the ratio of these two is 0.99999x).
332// Keeping a high threshold for now.
333#define APPROX_LOG_WITH_CORRECTION_MAX  65536
334#define APPROX_LOG_MAX                   4096
335#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
336static float FastSLog2Slow(uint32_t v) {
337  assert(v >= LOG_LOOKUP_IDX_MAX);
338  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
339    int log_cnt = 0;
340    uint32_t y = 1;
341    int correction = 0;
342    const float v_f = (float)v;
343    const uint32_t orig_v = v;
344    do {
345      ++log_cnt;
346      v = v >> 1;
347      y = y << 1;
348    } while (v >= LOG_LOOKUP_IDX_MAX);
349    // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
350    // Xf = floor(Xf) * (1 + (v % y) / v)
351    // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
352    // The correction factor: log(1 + d) ~ d; for very small d values, so
353    // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
354    // LOG_2_RECIPROCAL ~ 23/16
355    correction = (23 * (orig_v & (y - 1))) >> 4;
356    return v_f * (kLog2Table[v] + log_cnt) + correction;
357  } else {
358    return (float)(LOG_2_RECIPROCAL * v * log((double)v));
359  }
360}
361
362static float FastLog2Slow(uint32_t v) {
363  assert(v >= LOG_LOOKUP_IDX_MAX);
364  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
365    int log_cnt = 0;
366    uint32_t y = 1;
367    const uint32_t orig_v = v;
368    double log_2;
369    do {
370      ++log_cnt;
371      v = v >> 1;
372      y = y << 1;
373    } while (v >= LOG_LOOKUP_IDX_MAX);
374    log_2 = kLog2Table[v] + log_cnt;
375    if (orig_v >= APPROX_LOG_MAX) {
376      // Since the division is still expensive, add this correction factor only
377      // for large values of 'v'.
378      const int correction = (23 * (orig_v & (y - 1))) >> 4;
379      log_2 += (double)correction / orig_v;
380    }
381    return (float)log_2;
382  } else {
383    return (float)(LOG_2_RECIPROCAL * log((double)v));
384  }
385}
386
387//------------------------------------------------------------------------------
388// Image transforms.
389
390// Mostly used to reduce code size + readability
391static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
392
393// In-place sum of each component with mod 256.
394static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
395  const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
396  const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
397  *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
398}
399
400static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
401  return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
402}
403
404static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
405  return Average2(Average2(a0, a2), a1);
406}
407
408static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
409                                     uint32_t a2, uint32_t a3) {
410  return Average2(Average2(a0, a1), Average2(a2, a3));
411}
412
413static WEBP_INLINE uint32_t Clip255(uint32_t a) {
414  if (a < 256) {
415    return a;
416  }
417  // return 0, when a is a negative integer.
418  // return 255, when a is positive.
419  return ~a >> 24;
420}
421
422static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
423  return Clip255(a + b - c);
424}
425
426static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
427                                                   uint32_t c2) {
428  const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
429  const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
430                                         (c1 >> 16) & 0xff,
431                                         (c2 >> 16) & 0xff);
432  const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
433                                         (c1 >> 8) & 0xff,
434                                         (c2 >> 8) & 0xff);
435  const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
436  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
437}
438
439static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
440  return Clip255(a + (a - b) / 2);
441}
442
443static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
444                                                   uint32_t c2) {
445  const uint32_t ave = Average2(c0, c1);
446  const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
447  const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
448  const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
449  const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
450  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
451}
452
453static WEBP_INLINE int Sub3(int a, int b, int c) {
454  const int pb = b - c;
455  const int pa = a - c;
456  return abs(pb) - abs(pa);
457}
458
459static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
460  const int pa_minus_pb =
461      Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
462      Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
463      Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
464      Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
465  return (pa_minus_pb <= 0) ? a : b;
466}
467
468//------------------------------------------------------------------------------
469// Predictors
470
471static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
472  (void)top;
473  (void)left;
474  return ARGB_BLACK;
475}
476static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
477  (void)top;
478  return left;
479}
480static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
481  (void)left;
482  return top[0];
483}
484static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
485  (void)left;
486  return top[1];
487}
488static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
489  (void)left;
490  return top[-1];
491}
492static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
493  const uint32_t pred = Average3(left, top[0], top[1]);
494  return pred;
495}
496static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
497  const uint32_t pred = Average2(left, top[-1]);
498  return pred;
499}
500static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
501  const uint32_t pred = Average2(left, top[0]);
502  return pred;
503}
504static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
505  const uint32_t pred = Average2(top[-1], top[0]);
506  (void)left;
507  return pred;
508}
509static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
510  const uint32_t pred = Average2(top[0], top[1]);
511  (void)left;
512  return pred;
513}
514static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
515  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
516  return pred;
517}
518static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
519  const uint32_t pred = Select(top[0], left, top[-1]);
520  return pred;
521}
522static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
523  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
524  return pred;
525}
526static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
527  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
528  return pred;
529}
530
531static const VP8LPredictorFunc kPredictorsC[16] = {
532  Predictor0, Predictor1, Predictor2, Predictor3,
533  Predictor4, Predictor5, Predictor6, Predictor7,
534  Predictor8, Predictor9, Predictor10, Predictor11,
535  Predictor12, Predictor13,
536  Predictor0, Predictor0    // <- padding security sentinels
537};
538
539static float PredictionCostSpatial(const int counts[256], int weight_0,
540                                   double exp_val) {
541  const int significant_symbols = 256 >> 4;
542  const double exp_decay_factor = 0.6;
543  double bits = weight_0 * counts[0];
544  int i;
545  for (i = 1; i < significant_symbols; ++i) {
546    bits += exp_val * (counts[i] + counts[256 - i]);
547    exp_val *= exp_decay_factor;
548  }
549  return (float)(-0.1 * bits);
550}
551
552// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
553static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
554  int i;
555  double retval = 0.;
556  int sumX = 0, sumXY = 0;
557  for (i = 0; i < 256; ++i) {
558    const int x = X[i];
559    const int xy = x + Y[i];
560    if (x != 0) {
561      sumX += x;
562      retval -= VP8LFastSLog2(x);
563      sumXY += xy;
564      retval -= VP8LFastSLog2(xy);
565    } else if (xy != 0) {
566      sumXY += xy;
567      retval -= VP8LFastSLog2(xy);
568    }
569  }
570  retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
571  return (float)retval;
572}
573
574static float PredictionCostSpatialHistogram(const int accumulated[4][256],
575                                            const int tile[4][256]) {
576  int i;
577  double retval = 0;
578  for (i = 0; i < 4; ++i) {
579    const double kExpValue = 0.94;
580    retval += PredictionCostSpatial(tile[i], 1, kExpValue);
581    retval += CombinedShannonEntropy(tile[i], accumulated[i]);
582  }
583  return (float)retval;
584}
585
586static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
587  ++histo_argb[0][argb >> 24];
588  ++histo_argb[1][(argb >> 16) & 0xff];
589  ++histo_argb[2][(argb >> 8) & 0xff];
590  ++histo_argb[3][argb & 0xff];
591}
592
593static int GetBestPredictorForTile(int width, int height,
594                                   int tile_x, int tile_y, int bits,
595                                   const int accumulated[4][256],
596                                   const uint32_t* const argb_scratch) {
597  const int kNumPredModes = 14;
598  const int col_start = tile_x << bits;
599  const int row_start = tile_y << bits;
600  const int tile_size = 1 << bits;
601  const int max_y = GetMin(tile_size, height - row_start);
602  const int max_x = GetMin(tile_size, width - col_start);
603  float best_diff = MAX_DIFF_COST;
604  int best_mode = 0;
605  int mode;
606  for (mode = 0; mode < kNumPredModes; ++mode) {
607    const uint32_t* current_row = argb_scratch;
608    const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
609    float cur_diff;
610    int y;
611    int histo_argb[4][256];
612    memset(histo_argb, 0, sizeof(histo_argb));
613    for (y = 0; y < max_y; ++y) {
614      int x;
615      const int row = row_start + y;
616      const uint32_t* const upper_row = current_row;
617      current_row = upper_row + width;
618      for (x = 0; x < max_x; ++x) {
619        const int col = col_start + x;
620        uint32_t predict;
621        if (row == 0) {
622          predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
623        } else if (col == 0) {
624          predict = upper_row[col];  // Top.
625        } else {
626          predict = pred_func(current_row[col - 1], upper_row + col);
627        }
628        UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));
629      }
630    }
631    cur_diff = PredictionCostSpatialHistogram(
632        accumulated, (const int (*)[256])histo_argb);
633    if (cur_diff < best_diff) {
634      best_diff = cur_diff;
635      best_mode = mode;
636    }
637  }
638
639  return best_mode;
640}
641
642static void CopyTileWithPrediction(int width, int height,
643                                   int tile_x, int tile_y, int bits, int mode,
644                                   const uint32_t* const argb_scratch,
645                                   uint32_t* const argb) {
646  const int col_start = tile_x << bits;
647  const int row_start = tile_y << bits;
648  const int tile_size = 1 << bits;
649  const int max_y = GetMin(tile_size, height - row_start);
650  const int max_x = GetMin(tile_size, width - col_start);
651  const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
652  const uint32_t* current_row = argb_scratch;
653
654  int y;
655  for (y = 0; y < max_y; ++y) {
656    int x;
657    const int row = row_start + y;
658    const uint32_t* const upper_row = current_row;
659    current_row = upper_row + width;
660    for (x = 0; x < max_x; ++x) {
661      const int col = col_start + x;
662      const int pix = row * width + col;
663      uint32_t predict;
664      if (row == 0) {
665        predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
666      } else if (col == 0) {
667        predict = upper_row[col];  // Top.
668      } else {
669        predict = pred_func(current_row[col - 1], upper_row + col);
670      }
671      argb[pix] = VP8LSubPixels(current_row[col], predict);
672    }
673  }
674}
675
676void VP8LResidualImage(int width, int height, int bits,
677                       uint32_t* const argb, uint32_t* const argb_scratch,
678                       uint32_t* const image) {
679  const int max_tile_size = 1 << bits;
680  const int tiles_per_row = VP8LSubSampleSize(width, bits);
681  const int tiles_per_col = VP8LSubSampleSize(height, bits);
682  uint32_t* const upper_row = argb_scratch;
683  uint32_t* const current_tile_rows = argb_scratch + width;
684  int tile_y;
685  int histo[4][256];
686  memset(histo, 0, sizeof(histo));
687  for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
688    const int tile_y_offset = tile_y * max_tile_size;
689    const int this_tile_height =
690        (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
691    int tile_x;
692    if (tile_y > 0) {
693      memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
694             width * sizeof(*upper_row));
695    }
696    memcpy(current_tile_rows, &argb[tile_y_offset * width],
697           this_tile_height * width * sizeof(*current_tile_rows));
698    for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
699      int pred;
700      int y;
701      const int tile_x_offset = tile_x * max_tile_size;
702      int all_x_max = tile_x_offset + max_tile_size;
703      if (all_x_max > width) {
704        all_x_max = width;
705      }
706      pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits,
707                                     (const int (*)[256])histo,
708                                     argb_scratch);
709      image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
710      CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
711                             argb_scratch, argb);
712      for (y = 0; y < max_tile_size; ++y) {
713        int ix;
714        int all_x;
715        int all_y = tile_y_offset + y;
716        if (all_y >= height) {
717          break;
718        }
719        ix = all_y * width + tile_x_offset;
720        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
721          UpdateHisto(histo, argb[ix]);
722        }
723      }
724    }
725  }
726}
727
728// Inverse prediction.
729static void PredictorInverseTransform(const VP8LTransform* const transform,
730                                      int y_start, int y_end, uint32_t* data) {
731  const int width = transform->xsize_;
732  if (y_start == 0) {  // First Row follows the L (mode=1) mode.
733    int x;
734    const uint32_t pred0 = Predictor0(data[-1], NULL);
735    AddPixelsEq(data, pred0);
736    for (x = 1; x < width; ++x) {
737      const uint32_t pred1 = Predictor1(data[x - 1], NULL);
738      AddPixelsEq(data + x, pred1);
739    }
740    data += width;
741    ++y_start;
742  }
743
744  {
745    int y = y_start;
746    const int tile_width = 1 << transform->bits_;
747    const int mask = tile_width - 1;
748    const int safe_width = width & ~mask;
749    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
750    const uint32_t* pred_mode_base =
751        transform->data_ + (y >> transform->bits_) * tiles_per_row;
752
753    while (y < y_end) {
754      const uint32_t pred2 = Predictor2(data[-1], data - width);
755      const uint32_t* pred_mode_src = pred_mode_base;
756      VP8LPredictorFunc pred_func;
757      int x = 1;
758      int t = 1;
759      // First pixel follows the T (mode=2) mode.
760      AddPixelsEq(data, pred2);
761      // .. the rest:
762      while (x < safe_width) {
763        pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
764        for (; t < tile_width; ++t, ++x) {
765          const uint32_t pred = pred_func(data[x - 1], data + x - width);
766          AddPixelsEq(data + x, pred);
767        }
768        t = 0;
769      }
770      if (x < width) {
771        pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
772        for (; x < width; ++x) {
773          const uint32_t pred = pred_func(data[x - 1], data + x - width);
774          AddPixelsEq(data + x, pred);
775        }
776      }
777      data += width;
778      ++y;
779      if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
780        pred_mode_base += tiles_per_row;
781      }
782    }
783  }
784}
785
786void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
787  int i;
788  for (i = 0; i < num_pixels; ++i) {
789    const uint32_t argb = argb_data[i];
790    const uint32_t green = (argb >> 8) & 0xff;
791    const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
792    const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
793    argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
794  }
795}
796
797// Add green to blue and red channels (i.e. perform the inverse transform of
798// 'subtract green').
799void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
800  int i;
801  for (i = 0; i < num_pixels; ++i) {
802    const uint32_t argb = data[i];
803    const uint32_t green = ((argb >> 8) & 0xff);
804    uint32_t red_blue = (argb & 0x00ff00ffu);
805    red_blue += (green << 16) | green;
806    red_blue &= 0x00ff00ffu;
807    data[i] = (argb & 0xff00ff00u) | red_blue;
808  }
809}
810
811static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
812  m->green_to_red_ = 0;
813  m->green_to_blue_ = 0;
814  m->red_to_blue_ = 0;
815}
816
817static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
818                                                int8_t color) {
819  return (uint32_t)((int)(color_pred) * color) >> 5;
820}
821
822static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
823                                               VP8LMultipliers* const m) {
824  m->green_to_red_  = (color_code >>  0) & 0xff;
825  m->green_to_blue_ = (color_code >>  8) & 0xff;
826  m->red_to_blue_   = (color_code >> 16) & 0xff;
827}
828
829static WEBP_INLINE uint32_t MultipliersToColorCode(
830    const VP8LMultipliers* const m) {
831  return 0xff000000u |
832         ((uint32_t)(m->red_to_blue_) << 16) |
833         ((uint32_t)(m->green_to_blue_) << 8) |
834         m->green_to_red_;
835}
836
837void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
838                          int num_pixels) {
839  int i;
840  for (i = 0; i < num_pixels; ++i) {
841    const uint32_t argb = data[i];
842    const uint32_t green = argb >> 8;
843    const uint32_t red = argb >> 16;
844    uint32_t new_red = red;
845    uint32_t new_blue = argb;
846    new_red -= ColorTransformDelta(m->green_to_red_, green);
847    new_red &= 0xff;
848    new_blue -= ColorTransformDelta(m->green_to_blue_, green);
849    new_blue -= ColorTransformDelta(m->red_to_blue_, red);
850    new_blue &= 0xff;
851    data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
852  }
853}
854
855void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
856                                 int num_pixels) {
857  int i;
858  for (i = 0; i < num_pixels; ++i) {
859    const uint32_t argb = data[i];
860    const uint32_t green = argb >> 8;
861    const uint32_t red = argb >> 16;
862    uint32_t new_red = red;
863    uint32_t new_blue = argb;
864    new_red += ColorTransformDelta(m->green_to_red_, green);
865    new_red &= 0xff;
866    new_blue += ColorTransformDelta(m->green_to_blue_, green);
867    new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
868    new_blue &= 0xff;
869    data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
870  }
871}
872
873static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
874                                             uint32_t argb) {
875  const uint32_t green = argb >> 8;
876  uint32_t new_red = argb >> 16;
877  new_red -= ColorTransformDelta(green_to_red, green);
878  return (new_red & 0xff);
879}
880
881static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
882                                              uint8_t red_to_blue,
883                                              uint32_t argb) {
884  const uint32_t green = argb >> 8;
885  const uint32_t red = argb >> 16;
886  uint8_t new_blue = argb;
887  new_blue -= ColorTransformDelta(green_to_blue, green);
888  new_blue -= ColorTransformDelta(red_to_blue, red);
889  return (new_blue & 0xff);
890}
891
892static float PredictionCostCrossColor(const int accumulated[256],
893                                      const int counts[256]) {
894  // Favor low entropy, locally and globally.
895  // Favor small absolute values for PredictionCostSpatial
896  static const double kExpValue = 2.4;
897  return CombinedShannonEntropy(counts, accumulated) +
898         PredictionCostSpatial(counts, 3, kExpValue);
899}
900
901static float GetPredictionCostCrossColorRed(
902    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
903    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
904    const int accumulated_red_histo[256], const uint32_t* const argb) {
905  int all_y;
906  int histo[256] = { 0 };
907  float cur_diff;
908  for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
909    int ix = all_y * xsize + tile_x_offset;
910    int all_x;
911    for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
912      ++histo[TransformColorRed(green_to_red, argb[ix])];  // red.
913    }
914  }
915  cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
916  if ((uint8_t)green_to_red == prev_x.green_to_red_) {
917    cur_diff -= 3;  // favor keeping the areas locally similar
918  }
919  if ((uint8_t)green_to_red == prev_y.green_to_red_) {
920    cur_diff -= 3;  // favor keeping the areas locally similar
921  }
922  if (green_to_red == 0) {
923    cur_diff -= 3;
924  }
925  return cur_diff;
926}
927
928static void GetBestGreenToRed(
929    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
930    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
931    const int accumulated_red_histo[256], const uint32_t* const argb,
932    VP8LMultipliers* const best_tx) {
933  int min_green_to_red = -64;
934  int max_green_to_red = 64;
935  int green_to_red = 0;
936  int eval_min = 1;
937  int eval_max = 1;
938  float cur_diff_min = MAX_DIFF_COST;
939  float cur_diff_max = MAX_DIFF_COST;
940  // Do a binary search to find the optimal green_to_red color transform.
941  while (max_green_to_red - min_green_to_red > 2) {
942    if (eval_min) {
943      cur_diff_min = GetPredictionCostCrossColorRed(
944          tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
945          prev_x, prev_y, min_green_to_red, accumulated_red_histo, argb);
946      eval_min = 0;
947    }
948    if (eval_max) {
949      cur_diff_max = GetPredictionCostCrossColorRed(
950          tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
951          prev_x, prev_y, max_green_to_red, accumulated_red_histo, argb);
952      eval_max = 0;
953    }
954    if (cur_diff_min < cur_diff_max) {
955      green_to_red = min_green_to_red;
956      max_green_to_red = (max_green_to_red + min_green_to_red) / 2;
957      eval_max = 1;
958    } else {
959      green_to_red = max_green_to_red;
960      min_green_to_red = (max_green_to_red + min_green_to_red) / 2;
961      eval_min = 1;
962    }
963  }
964  best_tx->green_to_red_ = green_to_red;
965}
966
967static float GetPredictionCostCrossColorBlue(
968    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
969    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
970    int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],
971    const uint32_t* const argb) {
972  int all_y;
973  int histo[256] = { 0 };
974  float cur_diff;
975  for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
976    int all_x;
977    int ix = all_y * xsize + tile_x_offset;
978    for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
979      ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
980    }
981  }
982  cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
983  if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
984    cur_diff -= 3;  // favor keeping the areas locally similar
985  }
986  if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
987    cur_diff -= 3;  // favor keeping the areas locally similar
988  }
989  if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
990    cur_diff -= 3;  // favor keeping the areas locally similar
991  }
992  if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
993    cur_diff -= 3;  // favor keeping the areas locally similar
994  }
995  if (green_to_blue == 0) {
996    cur_diff -= 3;
997  }
998  if (red_to_blue == 0) {
999    cur_diff -= 3;
1000  }
1001  return cur_diff;
1002}
1003
1004static void GetBestGreenRedToBlue(
1005    int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
1006    int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
1007    const int accumulated_blue_histo[256], const uint32_t* const argb,
1008    VP8LMultipliers* const best_tx) {
1009  float best_diff = MAX_DIFF_COST;
1010  float cur_diff;
1011  const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
1012  const int min_green_to_blue = -32;
1013  const int max_green_to_blue = 32;
1014  const int min_red_to_blue = -32;
1015  const int max_red_to_blue = 32;
1016  const int num_iters =
1017      (1 + (max_green_to_blue - min_green_to_blue) / step) *
1018      (1 + (max_red_to_blue - min_red_to_blue) / step);
1019  // Number of tries to get optimal green_to_blue & red_to_blue color transforms
1020  // after finding a local minima.
1021  const int max_tries_after_min = 4 + (num_iters >> 2);
1022  int num_tries_after_min = 0;
1023  int green_to_blue;
1024  for (green_to_blue = min_green_to_blue;
1025       green_to_blue <= max_green_to_blue &&
1026       num_tries_after_min < max_tries_after_min;
1027       green_to_blue += step) {
1028    int red_to_blue;
1029    for (red_to_blue = min_red_to_blue;
1030         red_to_blue <= max_red_to_blue &&
1031         num_tries_after_min < max_tries_after_min;
1032         red_to_blue += step) {
1033      cur_diff = GetPredictionCostCrossColorBlue(
1034          tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,
1035          prev_y, green_to_blue, red_to_blue, accumulated_blue_histo, argb);
1036      if (cur_diff < best_diff) {
1037        best_diff = cur_diff;
1038        best_tx->green_to_blue_ = green_to_blue;
1039        best_tx->red_to_blue_ = red_to_blue;
1040        num_tries_after_min = 0;
1041      } else {
1042        ++num_tries_after_min;
1043      }
1044    }
1045  }
1046}
1047
1048static VP8LMultipliers GetBestColorTransformForTile(
1049    int tile_x, int tile_y, int bits,
1050    VP8LMultipliers prev_x,
1051    VP8LMultipliers prev_y,
1052    int quality, int xsize, int ysize,
1053    const int accumulated_red_histo[256],
1054    const int accumulated_blue_histo[256],
1055    const uint32_t* const argb) {
1056  const int max_tile_size = 1 << bits;
1057  const int tile_y_offset = tile_y * max_tile_size;
1058  const int tile_x_offset = tile_x * max_tile_size;
1059  const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
1060  const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
1061  VP8LMultipliers best_tx;
1062  MultipliersClear(&best_tx);
1063
1064  GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
1065                    prev_x, prev_y, accumulated_red_histo, argb, &best_tx);
1066  GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max,
1067                        xsize, prev_x, prev_y, quality, accumulated_blue_histo,
1068                        argb, &best_tx);
1069  return best_tx;
1070}
1071
1072static void CopyTileWithColorTransform(int xsize, int ysize,
1073                                       int tile_x, int tile_y,
1074                                       int max_tile_size,
1075                                       VP8LMultipliers color_transform,
1076                                       uint32_t* argb) {
1077  const int xscan = GetMin(max_tile_size, xsize - tile_x);
1078  int yscan = GetMin(max_tile_size, ysize - tile_y);
1079  argb += tile_y * xsize + tile_x;
1080  while (yscan-- > 0) {
1081    VP8LTransformColor(&color_transform, argb, xscan);
1082    argb += xsize;
1083  }
1084}
1085
1086void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
1087                             uint32_t* const argb, uint32_t* image) {
1088  const int max_tile_size = 1 << bits;
1089  const int tile_xsize = VP8LSubSampleSize(width, bits);
1090  const int tile_ysize = VP8LSubSampleSize(height, bits);
1091  int accumulated_red_histo[256] = { 0 };
1092  int accumulated_blue_histo[256] = { 0 };
1093  int tile_x, tile_y;
1094  VP8LMultipliers prev_x, prev_y;
1095  MultipliersClear(&prev_y);
1096  MultipliersClear(&prev_x);
1097  for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
1098    for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
1099      int y;
1100      const int tile_x_offset = tile_x * max_tile_size;
1101      const int tile_y_offset = tile_y * max_tile_size;
1102      const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
1103      const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
1104      const int offset = tile_y * tile_xsize + tile_x;
1105      if (tile_y != 0) {
1106        ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
1107      }
1108      prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
1109                                            prev_x, prev_y,
1110                                            quality, width, height,
1111                                            accumulated_red_histo,
1112                                            accumulated_blue_histo,
1113                                            argb);
1114      image[offset] = MultipliersToColorCode(&prev_x);
1115      CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
1116                                 max_tile_size, prev_x, argb);
1117
1118      // Gather accumulated histogram data.
1119      for (y = tile_y_offset; y < all_y_max; ++y) {
1120        int ix = y * width + tile_x_offset;
1121        const int ix_end = ix + all_x_max - tile_x_offset;
1122        for (; ix < ix_end; ++ix) {
1123          const uint32_t pix = argb[ix];
1124          if (ix >= 2 &&
1125              pix == argb[ix - 2] &&
1126              pix == argb[ix - 1]) {
1127            continue;  // repeated pixels are handled by backward references
1128          }
1129          if (ix >= width + 2 &&
1130              argb[ix - 2] == argb[ix - width - 2] &&
1131              argb[ix - 1] == argb[ix - width - 1] &&
1132              pix == argb[ix - width]) {
1133            continue;  // repeated pixels are handled by backward references
1134          }
1135          ++accumulated_red_histo[(pix >> 16) & 0xff];
1136          ++accumulated_blue_histo[(pix >> 0) & 0xff];
1137        }
1138      }
1139    }
1140  }
1141}
1142
1143// Color space inverse transform.
1144static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
1145                                       int y_start, int y_end, uint32_t* data) {
1146  const int width = transform->xsize_;
1147  const int tile_width = 1 << transform->bits_;
1148  const int mask = tile_width - 1;
1149  const int safe_width = width & ~mask;
1150  const int remaining_width = width - safe_width;
1151  const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
1152  int y = y_start;
1153  const uint32_t* pred_row =
1154      transform->data_ + (y >> transform->bits_) * tiles_per_row;
1155
1156  while (y < y_end) {
1157    const uint32_t* pred = pred_row;
1158    VP8LMultipliers m = { 0, 0, 0 };
1159    const uint32_t* const data_safe_end = data + safe_width;
1160    const uint32_t* const data_end = data + width;
1161    while (data < data_safe_end) {
1162      ColorCodeToMultipliers(*pred++, &m);
1163      VP8LTransformColorInverse(&m, data, tile_width);
1164      data += tile_width;
1165    }
1166    if (data < data_end) {  // Left-overs using C-version.
1167      ColorCodeToMultipliers(*pred++, &m);
1168      VP8LTransformColorInverse(&m, data, remaining_width);
1169      data += remaining_width;
1170    }
1171    ++y;
1172    if ((y & mask) == 0) pred_row += tiles_per_row;;
1173  }
1174}
1175
1176// Separate out pixels packed together using pixel-bundling.
1177// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
1178#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)             \
1179void FUNC_NAME(const VP8LTransform* const transform,                           \
1180               int y_start, int y_end, const TYPE* src, TYPE* dst) {           \
1181  int y;                                                                       \
1182  const int bits_per_pixel = 8 >> transform->bits_;                            \
1183  const int width = transform->xsize_;                                         \
1184  const uint32_t* const color_map = transform->data_;                          \
1185  if (bits_per_pixel < 8) {                                                    \
1186    const int pixels_per_byte = 1 << transform->bits_;                         \
1187    const int count_mask = pixels_per_byte - 1;                                \
1188    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
1189    for (y = y_start; y < y_end; ++y) {                                        \
1190      uint32_t packed_pixels = 0;                                              \
1191      int x;                                                                   \
1192      for (x = 0; x < width; ++x) {                                            \
1193        /* We need to load fresh 'packed_pixels' once every                */  \
1194        /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
1195        /* is a power of 2, so can just use a mask for that, instead of    */  \
1196        /* decrementing a counter.                                         */  \
1197        if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
1198        *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
1199        packed_pixels >>= bits_per_pixel;                                      \
1200      }                                                                        \
1201    }                                                                          \
1202  } else {                                                                     \
1203    for (y = y_start; y < y_end; ++y) {                                        \
1204      int x;                                                                   \
1205      for (x = 0; x < width; ++x) {                                            \
1206        *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                      \
1207      }                                                                        \
1208    }                                                                          \
1209  }                                                                            \
1210}
1211
1212static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
1213  return (idx >> 8) & 0xff;
1214}
1215
1216static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
1217  return idx;
1218}
1219
1220static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
1221  return val;
1222}
1223
1224static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
1225  return (val >> 8) & 0xff;
1226}
1227
1228static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
1229                           GetARGBValue)
1230COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
1231                    GetAlphaValue)
1232
1233#undef COLOR_INDEX_INVERSE
1234
1235void VP8LInverseTransform(const VP8LTransform* const transform,
1236                          int row_start, int row_end,
1237                          const uint32_t* const in, uint32_t* const out) {
1238  const int width = transform->xsize_;
1239  assert(row_start < row_end);
1240  assert(row_end <= transform->ysize_);
1241  switch (transform->type_) {
1242    case SUBTRACT_GREEN:
1243      VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
1244      break;
1245    case PREDICTOR_TRANSFORM:
1246      PredictorInverseTransform(transform, row_start, row_end, out);
1247      if (row_end != transform->ysize_) {
1248        // The last predicted row in this iteration will be the top-pred row
1249        // for the first row in next iteration.
1250        memcpy(out - width, out + (row_end - row_start - 1) * width,
1251               width * sizeof(*out));
1252      }
1253      break;
1254    case CROSS_COLOR_TRANSFORM:
1255      ColorSpaceInverseTransform(transform, row_start, row_end, out);
1256      break;
1257    case COLOR_INDEXING_TRANSFORM:
1258      if (in == out && transform->bits_ > 0) {
1259        // Move packed pixels to the end of unpacked region, so that unpacking
1260        // can occur seamlessly.
1261        // Also, note that this is the only transform that applies on
1262        // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1263        // transforms work on effective width of xsize_.
1264        const int out_stride = (row_end - row_start) * width;
1265        const int in_stride = (row_end - row_start) *
1266            VP8LSubSampleSize(transform->xsize_, transform->bits_);
1267        uint32_t* const src = out + out_stride - in_stride;
1268        memmove(src, out, in_stride * sizeof(*src));
1269        ColorIndexInverseTransform(transform, row_start, row_end, src, out);
1270      } else {
1271        ColorIndexInverseTransform(transform, row_start, row_end, in, out);
1272      }
1273      break;
1274  }
1275}
1276
1277//------------------------------------------------------------------------------
1278// Color space conversion.
1279
1280static int is_big_endian(void) {
1281  static const union {
1282    uint16_t w;
1283    uint8_t b[2];
1284  } tmp = { 1 };
1285  return (tmp.b[0] != 1);
1286}
1287
1288void VP8LConvertBGRAToRGB_C(const uint32_t* src,
1289                            int num_pixels, uint8_t* dst) {
1290  const uint32_t* const src_end = src + num_pixels;
1291  while (src < src_end) {
1292    const uint32_t argb = *src++;
1293    *dst++ = (argb >> 16) & 0xff;
1294    *dst++ = (argb >>  8) & 0xff;
1295    *dst++ = (argb >>  0) & 0xff;
1296  }
1297}
1298
1299void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
1300                             int num_pixels, uint8_t* dst) {
1301  const uint32_t* const src_end = src + num_pixels;
1302  while (src < src_end) {
1303    const uint32_t argb = *src++;
1304    *dst++ = (argb >> 16) & 0xff;
1305    *dst++ = (argb >>  8) & 0xff;
1306    *dst++ = (argb >>  0) & 0xff;
1307    *dst++ = (argb >> 24) & 0xff;
1308  }
1309}
1310
1311void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
1312                                 int num_pixels, uint8_t* dst) {
1313  const uint32_t* const src_end = src + num_pixels;
1314  while (src < src_end) {
1315    const uint32_t argb = *src++;
1316    const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
1317    const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
1318#ifdef WEBP_SWAP_16BIT_CSP
1319    *dst++ = ba;
1320    *dst++ = rg;
1321#else
1322    *dst++ = rg;
1323    *dst++ = ba;
1324#endif
1325  }
1326}
1327
1328void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
1329                               int num_pixels, uint8_t* dst) {
1330  const uint32_t* const src_end = src + num_pixels;
1331  while (src < src_end) {
1332    const uint32_t argb = *src++;
1333    const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
1334    const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
1335#ifdef WEBP_SWAP_16BIT_CSP
1336    *dst++ = gb;
1337    *dst++ = rg;
1338#else
1339    *dst++ = rg;
1340    *dst++ = gb;
1341#endif
1342  }
1343}
1344
1345void VP8LConvertBGRAToBGR_C(const uint32_t* src,
1346                            int num_pixels, uint8_t* dst) {
1347  const uint32_t* const src_end = src + num_pixels;
1348  while (src < src_end) {
1349    const uint32_t argb = *src++;
1350    *dst++ = (argb >>  0) & 0xff;
1351    *dst++ = (argb >>  8) & 0xff;
1352    *dst++ = (argb >> 16) & 0xff;
1353  }
1354}
1355
1356static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
1357                       int swap_on_big_endian) {
1358  if (is_big_endian() == swap_on_big_endian) {
1359    const uint32_t* const src_end = src + num_pixels;
1360    while (src < src_end) {
1361      const uint32_t argb = *src++;
1362
1363#if !defined(WORDS_BIGENDIAN)
1364#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
1365      *(uint32_t*)dst = BSwap32(argb);
1366#else  // WEBP_REFERENCE_IMPLEMENTATION
1367      dst[0] = (argb >> 24) & 0xff;
1368      dst[1] = (argb >> 16) & 0xff;
1369      dst[2] = (argb >>  8) & 0xff;
1370      dst[3] = (argb >>  0) & 0xff;
1371#endif
1372#else  // WORDS_BIGENDIAN
1373      dst[0] = (argb >>  0) & 0xff;
1374      dst[1] = (argb >>  8) & 0xff;
1375      dst[2] = (argb >> 16) & 0xff;
1376      dst[3] = (argb >> 24) & 0xff;
1377#endif
1378      dst += sizeof(argb);
1379    }
1380  } else {
1381    memcpy(dst, src, num_pixels * sizeof(*src));
1382  }
1383}
1384
1385void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
1386                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
1387  switch (out_colorspace) {
1388    case MODE_RGB:
1389      VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
1390      break;
1391    case MODE_RGBA:
1392      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
1393      break;
1394    case MODE_rgbA:
1395      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
1396      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1397      break;
1398    case MODE_BGR:
1399      VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
1400      break;
1401    case MODE_BGRA:
1402      CopyOrSwap(in_data, num_pixels, rgba, 1);
1403      break;
1404    case MODE_bgrA:
1405      CopyOrSwap(in_data, num_pixels, rgba, 1);
1406      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1407      break;
1408    case MODE_ARGB:
1409      CopyOrSwap(in_data, num_pixels, rgba, 0);
1410      break;
1411    case MODE_Argb:
1412      CopyOrSwap(in_data, num_pixels, rgba, 0);
1413      WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
1414      break;
1415    case MODE_RGBA_4444:
1416      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1417      break;
1418    case MODE_rgbA_4444:
1419      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1420      WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
1421      break;
1422    case MODE_RGB_565:
1423      VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
1424      break;
1425    default:
1426      assert(0);          // Code flow should not reach here.
1427  }
1428}
1429
1430//------------------------------------------------------------------------------
1431// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
1432void VP8LBundleColorMap(const uint8_t* const row, int width,
1433                        int xbits, uint32_t* const dst) {
1434  int x;
1435  if (xbits > 0) {
1436    const int bit_depth = 1 << (3 - xbits);
1437    const int mask = (1 << xbits) - 1;
1438    uint32_t code = 0xff000000;
1439    for (x = 0; x < width; ++x) {
1440      const int xsub = x & mask;
1441      if (xsub == 0) {
1442        code = 0xff000000;
1443      }
1444      code |= row[x] << (8 + bit_depth * xsub);
1445      dst[x >> xbits] = code;
1446    }
1447  } else {
1448    for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
1449  }
1450}
1451
1452//------------------------------------------------------------------------------
1453
1454static double ExtraCost(const uint32_t* population, int length) {
1455  int i;
1456  double cost = 0.;
1457  for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
1458  return cost;
1459}
1460
1461static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
1462                                int length) {
1463  int i;
1464  double cost = 0.;
1465  for (i = 2; i < length - 2; ++i) {
1466    const int xy = X[i + 2] + Y[i + 2];
1467    cost += (i >> 1) * xy;
1468  }
1469  return cost;
1470}
1471
1472// Returns the various RLE counts
1473static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
1474  int i;
1475  int streak = 0;
1476  VP8LStreaks stats;
1477  memset(&stats, 0, sizeof(stats));
1478  for (i = 0; i < length - 1; ++i) {
1479    ++streak;
1480    if (population[i] == population[i + 1]) {
1481      continue;
1482    }
1483    stats.counts[population[i] != 0] += (streak > 3);
1484    stats.streaks[population[i] != 0][(streak > 3)] += streak;
1485    streak = 0;
1486  }
1487  ++streak;
1488  stats.counts[population[i] != 0] += (streak > 3);
1489  stats.streaks[population[i] != 0][(streak > 3)] += streak;
1490  return stats;
1491}
1492
1493static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
1494                                            const uint32_t* Y, int length) {
1495  int i;
1496  int streak = 0;
1497  VP8LStreaks stats;
1498  memset(&stats, 0, sizeof(stats));
1499  for (i = 0; i < length - 1; ++i) {
1500    const int xy = X[i] + Y[i];
1501    const int xy_next = X[i + 1] + Y[i + 1];
1502    ++streak;
1503    if (xy == xy_next) {
1504      continue;
1505    }
1506    stats.counts[xy != 0] += (streak > 3);
1507    stats.streaks[xy != 0][(streak > 3)] += streak;
1508    streak = 0;
1509  }
1510  {
1511    const int xy = X[i] + Y[i];
1512    ++streak;
1513    stats.counts[xy != 0] += (streak > 3);
1514    stats.streaks[xy != 0][(streak > 3)] += streak;
1515  }
1516  return stats;
1517}
1518
1519//------------------------------------------------------------------------------
1520
1521static void HistogramAdd(const VP8LHistogram* const a,
1522                         const VP8LHistogram* const b,
1523                         VP8LHistogram* const out) {
1524  int i;
1525  const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
1526  assert(a->palette_code_bits_ == b->palette_code_bits_);
1527  if (b != out) {
1528    for (i = 0; i < literal_size; ++i) {
1529      out->literal_[i] = a->literal_[i] + b->literal_[i];
1530    }
1531    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
1532      out->distance_[i] = a->distance_[i] + b->distance_[i];
1533    }
1534    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
1535      out->red_[i] = a->red_[i] + b->red_[i];
1536      out->blue_[i] = a->blue_[i] + b->blue_[i];
1537      out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
1538    }
1539  } else {
1540    for (i = 0; i < literal_size; ++i) {
1541      out->literal_[i] += a->literal_[i];
1542    }
1543    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
1544      out->distance_[i] += a->distance_[i];
1545    }
1546    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
1547      out->red_[i] += a->red_[i];
1548      out->blue_[i] += a->blue_[i];
1549      out->alpha_[i] += a->alpha_[i];
1550    }
1551  }
1552}
1553
1554//------------------------------------------------------------------------------
1555
1556VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
1557VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
1558VP8LPredictorFunc VP8LPredictors[16];
1559
1560VP8LTransformColorFunc VP8LTransformColor;
1561VP8LTransformColorFunc VP8LTransformColorInverse;
1562
1563VP8LConvertFunc VP8LConvertBGRAToRGB;
1564VP8LConvertFunc VP8LConvertBGRAToRGBA;
1565VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
1566VP8LConvertFunc VP8LConvertBGRAToRGB565;
1567VP8LConvertFunc VP8LConvertBGRAToBGR;
1568
1569VP8LFastLog2SlowFunc VP8LFastLog2Slow;
1570VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
1571
1572VP8LCostFunc VP8LExtraCost;
1573VP8LCostCombinedFunc VP8LExtraCostCombined;
1574
1575VP8LCostCountFunc VP8LHuffmanCostCount;
1576VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
1577
1578VP8LHistogramAddFunc VP8LHistogramAdd;
1579
1580extern void VP8LDspInitSSE2(void);
1581extern void VP8LDspInitNEON(void);
1582extern void VP8LDspInitMIPS32(void);
1583
1584void VP8LDspInit(void) {
1585  memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));
1586
1587  VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
1588  VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
1589
1590  VP8LTransformColor = VP8LTransformColor_C;
1591  VP8LTransformColorInverse = VP8LTransformColorInverse_C;
1592
1593  VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
1594  VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
1595  VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
1596  VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
1597  VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
1598
1599  VP8LFastLog2Slow = FastLog2Slow;
1600  VP8LFastSLog2Slow = FastSLog2Slow;
1601
1602  VP8LExtraCost = ExtraCost;
1603  VP8LExtraCostCombined = ExtraCostCombined;
1604
1605  VP8LHuffmanCostCount = HuffmanCostCount;
1606  VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
1607
1608  VP8LHistogramAdd = HistogramAdd;
1609
1610  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
1611  if (VP8GetCPUInfo != NULL) {
1612#if defined(WEBP_USE_SSE2)
1613    if (VP8GetCPUInfo(kSSE2)) {
1614      VP8LDspInitSSE2();
1615    }
1616#endif
1617#if defined(WEBP_USE_NEON)
1618    if (VP8GetCPUInfo(kNEON)) {
1619      VP8LDspInitNEON();
1620    }
1621#endif
1622#if defined(WEBP_USE_MIPS32)
1623    if (VP8GetCPUInfo(kMIPS32)) {
1624      VP8LDspInitMIPS32();
1625    }
1626#endif
1627  }
1628}
1629
1630//------------------------------------------------------------------------------
1631