lossless.c revision 8b720228d581a84fd173b6dcb2fa295b59db489a
1// Copyright 2012 Google Inc. All Rights Reserved. 2// 3// Use of this source code is governed by a BSD-style license 4// that can be found in the COPYING file in the root of the source 5// tree. An additional intellectual property rights grant can be found 6// in the file PATENTS. All contributing project authors may 7// be found in the AUTHORS file in the root of the source tree. 8// ----------------------------------------------------------------------------- 9// 10// Image transforms and color space conversion methods for lossless decoder. 11// 12// Authors: Vikas Arora (vikaas.arora@gmail.com) 13// Jyrki Alakuijala (jyrki@google.com) 14// Urvang Joshi (urvang@google.com) 15 16#include "./dsp.h" 17 18#if defined(WEBP_USE_SSE2) 19#include <emmintrin.h> 20#endif 21 22#include <math.h> 23#include <stdlib.h> 24#include "./lossless.h" 25#include "../dec/vp8li.h" 26#include "./yuv.h" 27 28#define MAX_DIFF_COST (1e30f) 29 30// lookup table for small values of log2(int) 31#define APPROX_LOG_MAX 4096 32#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 33const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { 34 0.0000000000000000f, 0.0000000000000000f, 35 1.0000000000000000f, 1.5849625007211560f, 36 2.0000000000000000f, 2.3219280948873621f, 37 2.5849625007211560f, 2.8073549220576041f, 38 3.0000000000000000f, 3.1699250014423121f, 39 3.3219280948873621f, 3.4594316186372973f, 40 3.5849625007211560f, 3.7004397181410921f, 41 3.8073549220576041f, 3.9068905956085187f, 42 4.0000000000000000f, 4.0874628412503390f, 43 4.1699250014423121f, 4.2479275134435852f, 44 4.3219280948873626f, 4.3923174227787606f, 45 4.4594316186372973f, 4.5235619560570130f, 46 4.5849625007211560f, 4.6438561897747243f, 47 4.7004397181410917f, 4.7548875021634682f, 48 4.8073549220576037f, 4.8579809951275718f, 49 4.9068905956085187f, 4.9541963103868749f, 50 5.0000000000000000f, 5.0443941193584533f, 51 5.0874628412503390f, 5.1292830169449663f, 52 5.1699250014423121f, 5.2094533656289501f, 53 5.2479275134435852f, 5.2854022188622487f, 54 5.3219280948873626f, 5.3575520046180837f, 55 5.3923174227787606f, 5.4262647547020979f, 56 5.4594316186372973f, 5.4918530963296747f, 57 5.5235619560570130f, 5.5545888516776376f, 58 5.5849625007211560f, 5.6147098441152083f, 59 5.6438561897747243f, 5.6724253419714951f, 60 5.7004397181410917f, 5.7279204545631987f, 61 5.7548875021634682f, 5.7813597135246599f, 62 5.8073549220576037f, 5.8328900141647412f, 63 5.8579809951275718f, 5.8826430493618415f, 64 5.9068905956085187f, 5.9307373375628866f, 65 5.9541963103868749f, 5.9772799234999167f, 66 6.0000000000000000f, 6.0223678130284543f, 67 6.0443941193584533f, 6.0660891904577720f, 68 6.0874628412503390f, 6.1085244567781691f, 69 6.1292830169449663f, 6.1497471195046822f, 70 6.1699250014423121f, 6.1898245588800175f, 71 6.2094533656289501f, 6.2288186904958804f, 72 6.2479275134435852f, 6.2667865406949010f, 73 6.2854022188622487f, 6.3037807481771030f, 74 6.3219280948873626f, 6.3398500028846243f, 75 6.3575520046180837f, 6.3750394313469245f, 76 6.3923174227787606f, 6.4093909361377017f, 77 6.4262647547020979f, 6.4429434958487279f, 78 6.4594316186372973f, 6.4757334309663976f, 79 6.4918530963296747f, 6.5077946401986963f, 80 6.5235619560570130f, 6.5391588111080309f, 81 6.5545888516776376f, 6.5698556083309478f, 82 6.5849625007211560f, 6.5999128421871278f, 83 6.6147098441152083f, 6.6293566200796094f, 84 6.6438561897747243f, 6.6582114827517946f, 85 6.6724253419714951f, 6.6865005271832185f, 86 6.7004397181410917f, 6.7142455176661224f, 87 6.7279204545631987f, 6.7414669864011464f, 88 6.7548875021634682f, 6.7681843247769259f, 89 6.7813597135246599f, 6.7944158663501061f, 90 6.8073549220576037f, 6.8201789624151878f, 91 6.8328900141647412f, 6.8454900509443747f, 92 6.8579809951275718f, 6.8703647195834047f, 93 6.8826430493618415f, 6.8948177633079437f, 94 6.9068905956085187f, 6.9188632372745946f, 95 6.9307373375628866f, 6.9425145053392398f, 96 6.9541963103868749f, 6.9657842846620869f, 97 6.9772799234999167f, 6.9886846867721654f, 98 7.0000000000000000f, 7.0112272554232539f, 99 7.0223678130284543f, 7.0334230015374501f, 100 7.0443941193584533f, 7.0552824355011898f, 101 7.0660891904577720f, 7.0768155970508308f, 102 7.0874628412503390f, 7.0980320829605263f, 103 7.1085244567781691f, 7.1189410727235076f, 104 7.1292830169449663f, 7.1395513523987936f, 105 7.1497471195046822f, 7.1598713367783890f, 106 7.1699250014423121f, 7.1799090900149344f, 107 7.1898245588800175f, 7.1996723448363644f, 108 7.2094533656289501f, 7.2191685204621611f, 109 7.2288186904958804f, 7.2384047393250785f, 110 7.2479275134435852f, 7.2573878426926521f, 111 7.2667865406949010f, 7.2761244052742375f, 112 7.2854022188622487f, 7.2946207488916270f, 113 7.3037807481771030f, 7.3128829552843557f, 114 7.3219280948873626f, 7.3309168781146167f, 115 7.3398500028846243f, 7.3487281542310771f, 116 7.3575520046180837f, 7.3663222142458160f, 117 7.3750394313469245f, 7.3837042924740519f, 118 7.3923174227787606f, 7.4008794362821843f, 119 7.4093909361377017f, 7.4178525148858982f, 120 7.4262647547020979f, 7.4346282276367245f, 121 7.4429434958487279f, 7.4512111118323289f, 122 7.4594316186372973f, 7.4676055500829976f, 123 7.4757334309663976f, 7.4838157772642563f, 124 7.4918530963296747f, 7.4998458870832056f, 125 7.5077946401986963f, 7.5156998382840427f, 126 7.5235619560570130f, 7.5313814605163118f, 127 7.5391588111080309f, 7.5468944598876364f, 128 7.5545888516776376f, 7.5622424242210728f, 129 7.5698556083309478f, 7.5774288280357486f, 130 7.5849625007211560f, 7.5924570372680806f, 131 7.5999128421871278f, 7.6073303137496104f, 132 7.6147098441152083f, 7.6220518194563764f, 133 7.6293566200796094f, 7.6366246205436487f, 134 7.6438561897747243f, 7.6510516911789281f, 135 7.6582114827517946f, 7.6653359171851764f, 136 7.6724253419714951f, 7.6794800995054464f, 137 7.6865005271832185f, 7.6934869574993252f, 138 7.7004397181410917f, 7.7073591320808825f, 139 7.7142455176661224f, 7.7210991887071855f, 140 7.7279204545631987f, 7.7347096202258383f, 141 7.7414669864011464f, 7.7481928495894605f, 142 7.7548875021634682f, 7.7615512324444795f, 143 7.7681843247769259f, 7.7747870596011736f, 144 7.7813597135246599f, 7.7879025593914317f, 145 7.7944158663501061f, 7.8008998999203047f, 146 7.8073549220576037f, 7.8137811912170374f, 147 7.8201789624151878f, 7.8265484872909150f, 148 7.8328900141647412f, 7.8392037880969436f, 149 7.8454900509443747f, 7.8517490414160571f, 150 7.8579809951275718f, 7.8641861446542797f, 151 7.8703647195834047f, 7.8765169465649993f, 152 7.8826430493618415f, 7.8887432488982591f, 153 7.8948177633079437f, 7.9008668079807486f, 154 7.9068905956085187f, 7.9128893362299619f, 155 7.9188632372745946f, 7.9248125036057812f, 156 7.9307373375628866f, 7.9366379390025709f, 157 7.9425145053392398f, 7.9483672315846778f, 158 7.9541963103868749f, 7.9600019320680805f, 159 7.9657842846620869f, 7.9715435539507719f, 160 7.9772799234999167f, 7.9829935746943103f, 161 7.9886846867721654f, 7.9943534368588577f 162}; 163 164const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = { 165 0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f, 166 8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f, 167 24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f, 168 43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f, 169 64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f, 170 86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f, 171 110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f, 172 134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f, 173 160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f, 174 186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f, 175 212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f, 176 240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f, 177 268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f, 178 296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f, 179 325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f, 180 354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f, 181 384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f, 182 413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f, 183 444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f, 184 474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f, 185 505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f, 186 536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f, 187 568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f, 188 600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f, 189 632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f, 190 664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f, 191 696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f, 192 729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f, 193 762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f, 194 795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f, 195 828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f, 196 862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f, 197 896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f, 198 929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f, 199 963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f, 200 998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f, 201 1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f, 202 1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f, 203 1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f, 204 1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f, 205 1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f, 206 1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f, 207 1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f, 208 1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f, 209 1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f, 210 1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f, 211 1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f, 212 1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f, 213 1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f, 214 1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f, 215 1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f, 216 1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f, 217 1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f, 218 1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f, 219 1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f, 220 1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f, 221 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f, 222 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f, 223 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f, 224 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f, 225 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f, 226 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f, 227 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f, 228 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f 229}; 230 231const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = { 232 { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1}, 233 { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2}, 234 { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, 235 { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, 236 { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, 237 {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, 238 {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, 239 {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, 240 {11, 4}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, 241 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, 242 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, 243 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, 244 {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, 245 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, 246 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, 247 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, 248 {13, 5}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, 249 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, 250 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, 251 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, 252 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, 253 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, 254 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, 255 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, 256 {14, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, 257 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, 258 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, 259 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, 260 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, 261 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, 262 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, 263 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, 264 {15, 6}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 265 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 266 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 267 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 268 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 269 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 270 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 271 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 272 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 273 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 274 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 275 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 276 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 277 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 278 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 279 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, 280 {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 281 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 282 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 283 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 284 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 285 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 286 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 287 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 288 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 289 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 290 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 291 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 292 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 293 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 294 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 295 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, 296}; 297 298const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = { 299 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 300 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 301 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 302 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 303 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 304 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 305 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 306 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 307 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 308 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 309 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 310 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 311 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 312 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 313 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 314 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 315 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 316 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 317 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 318 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 319 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 320 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 321 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 322 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 323 127, 324 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 325 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 326 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 327 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 328 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 329 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 330 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 331 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126 332}; 333 334float VP8LFastSLog2Slow(int v) { 335 assert(v >= LOG_LOOKUP_IDX_MAX); 336 if (v < APPROX_LOG_MAX) { 337 int log_cnt = 0; 338 const float v_f = (float)v; 339 while (v >= LOG_LOOKUP_IDX_MAX) { 340 ++log_cnt; 341 v = v >> 1; 342 } 343 return v_f * (kLog2Table[v] + log_cnt); 344 } else { 345 return (float)(LOG_2_RECIPROCAL * v * log((double)v)); 346 } 347} 348 349float VP8LFastLog2Slow(int v) { 350 assert(v >= LOG_LOOKUP_IDX_MAX); 351 if (v < APPROX_LOG_MAX) { 352 int log_cnt = 0; 353 while (v >= LOG_LOOKUP_IDX_MAX) { 354 ++log_cnt; 355 v = v >> 1; 356 } 357 return kLog2Table[v] + log_cnt; 358 } else { 359 return (float)(LOG_2_RECIPROCAL * log((double)v)); 360 } 361} 362 363//------------------------------------------------------------------------------ 364// Image transforms. 365 366// In-place sum of each component with mod 256. 367static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) { 368 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u); 369 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu); 370 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); 371} 372 373static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { 374 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1); 375} 376 377static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { 378 return Average2(Average2(a0, a2), a1); 379} 380 381static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, 382 uint32_t a2, uint32_t a3) { 383 return Average2(Average2(a0, a1), Average2(a2, a3)); 384} 385 386static WEBP_INLINE uint32_t Clip255(uint32_t a) { 387 if (a < 256) { 388 return a; 389 } 390 // return 0, when a is a negative integer. 391 // return 255, when a is positive. 392 return ~a >> 24; 393} 394 395static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { 396 return Clip255(a + b - c); 397} 398 399static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, 400 uint32_t c2) { 401 const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24); 402 const int r = AddSubtractComponentFull((c0 >> 16) & 0xff, 403 (c1 >> 16) & 0xff, 404 (c2 >> 16) & 0xff); 405 const int g = AddSubtractComponentFull((c0 >> 8) & 0xff, 406 (c1 >> 8) & 0xff, 407 (c2 >> 8) & 0xff); 408 const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff); 409 return (a << 24) | (r << 16) | (g << 8) | b; 410} 411 412static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) { 413 return Clip255(a + (a - b) / 2); 414} 415 416static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, 417 uint32_t c2) { 418 const uint32_t ave = Average2(c0, c1); 419 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); 420 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); 421 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); 422 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); 423 return (a << 24) | (r << 16) | (g << 8) | b; 424} 425 426static WEBP_INLINE int Sub3(int a, int b, int c) { 427 const int pb = b - c; 428 const int pa = a - c; 429 return abs(pb) - abs(pa); 430} 431 432static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { 433 const int pa_minus_pb = 434 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + 435 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + 436 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + 437 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); 438 return (pa_minus_pb <= 0) ? a : b; 439} 440 441//------------------------------------------------------------------------------ 442// Predictors 443 444static uint32_t Predictor0(uint32_t left, const uint32_t* const top) { 445 (void)top; 446 (void)left; 447 return ARGB_BLACK; 448} 449static uint32_t Predictor1(uint32_t left, const uint32_t* const top) { 450 (void)top; 451 return left; 452} 453static uint32_t Predictor2(uint32_t left, const uint32_t* const top) { 454 (void)left; 455 return top[0]; 456} 457static uint32_t Predictor3(uint32_t left, const uint32_t* const top) { 458 (void)left; 459 return top[1]; 460} 461static uint32_t Predictor4(uint32_t left, const uint32_t* const top) { 462 (void)left; 463 return top[-1]; 464} 465static uint32_t Predictor5(uint32_t left, const uint32_t* const top) { 466 const uint32_t pred = Average3(left, top[0], top[1]); 467 return pred; 468} 469static uint32_t Predictor6(uint32_t left, const uint32_t* const top) { 470 const uint32_t pred = Average2(left, top[-1]); 471 return pred; 472} 473static uint32_t Predictor7(uint32_t left, const uint32_t* const top) { 474 const uint32_t pred = Average2(left, top[0]); 475 return pred; 476} 477static uint32_t Predictor8(uint32_t left, const uint32_t* const top) { 478 const uint32_t pred = Average2(top[-1], top[0]); 479 (void)left; 480 return pred; 481} 482static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { 483 const uint32_t pred = Average2(top[0], top[1]); 484 (void)left; 485 return pred; 486} 487static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { 488 const uint32_t pred = Average4(left, top[-1], top[0], top[1]); 489 return pred; 490} 491static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { 492 const uint32_t pred = VP8LSelect(top[0], left, top[-1]); 493 return pred; 494} 495static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { 496 const uint32_t pred = VP8LClampedAddSubtractFull(left, top[0], top[-1]); 497 return pred; 498} 499static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { 500 const uint32_t pred = VP8LClampedAddSubtractHalf(left, top[0], top[-1]); 501 return pred; 502} 503 504// TODO(vikasa): Export the predictor array, to allow SSE2 variants. 505typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top); 506static const PredictorFunc kPredictors[16] = { 507 Predictor0, Predictor1, Predictor2, Predictor3, 508 Predictor4, Predictor5, Predictor6, Predictor7, 509 Predictor8, Predictor9, Predictor10, Predictor11, 510 Predictor12, Predictor13, 511 Predictor0, Predictor0 // <- padding security sentinels 512}; 513 514// TODO(vikasa): Replace 256 etc with defines. 515static float PredictionCostSpatial(const int* counts, 516 int weight_0, double exp_val) { 517 const int significant_symbols = 16; 518 const double exp_decay_factor = 0.6; 519 double bits = weight_0 * counts[0]; 520 int i; 521 for (i = 1; i < significant_symbols; ++i) { 522 bits += exp_val * (counts[i] + counts[256 - i]); 523 exp_val *= exp_decay_factor; 524 } 525 return (float)(-0.1 * bits); 526} 527 528// Compute the combined Shanon's entropy for distribution {X} and {X+Y} 529static float CombinedShannonEntropy(const int* const X, 530 const int* const Y, int n) { 531 int i; 532 double retval = 0.; 533 int sumX = 0, sumXY = 0; 534 for (i = 0; i < n; ++i) { 535 const int x = X[i]; 536 const int xy = X[i] + Y[i]; 537 if (x != 0) { 538 sumX += x; 539 retval -= VP8LFastSLog2(x); 540 } 541 if (xy != 0) { 542 sumXY += xy; 543 retval -= VP8LFastSLog2(xy); 544 } 545 } 546 retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); 547 return (float)retval; 548} 549 550static float PredictionCostSpatialHistogram(int accumulated[4][256], 551 int tile[4][256]) { 552 int i; 553 double retval = 0; 554 for (i = 0; i < 4; ++i) { 555 const double kExpValue = 0.94; 556 retval += PredictionCostSpatial(tile[i], 1, kExpValue); 557 retval += CombinedShannonEntropy(tile[i], accumulated[i], 256); 558 } 559 return (float)retval; 560} 561 562static int GetBestPredictorForTile(int width, int height, 563 int tile_x, int tile_y, int bits, 564 int accumulated[4][256], 565 const uint32_t* const argb_scratch) { 566 const int kNumPredModes = 14; 567 const int col_start = tile_x << bits; 568 const int row_start = tile_y << bits; 569 const int tile_size = 1 << bits; 570 const int ymax = (tile_size <= height - row_start) ? 571 tile_size : height - row_start; 572 const int xmax = (tile_size <= width - col_start) ? 573 tile_size : width - col_start; 574 int histo[4][256]; 575 float best_diff = MAX_DIFF_COST; 576 int best_mode = 0; 577 578 int mode; 579 for (mode = 0; mode < kNumPredModes; ++mode) { 580 const uint32_t* current_row = argb_scratch; 581 const PredictorFunc pred_func = kPredictors[mode]; 582 float cur_diff; 583 int y; 584 memset(&histo[0][0], 0, sizeof(histo)); 585 for (y = 0; y < ymax; ++y) { 586 int x; 587 const int row = row_start + y; 588 const uint32_t* const upper_row = current_row; 589 current_row = upper_row + width; 590 for (x = 0; x < xmax; ++x) { 591 const int col = col_start + x; 592 uint32_t predict; 593 uint32_t predict_diff; 594 if (row == 0) { 595 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left. 596 } else if (col == 0) { 597 predict = upper_row[col]; // Top. 598 } else { 599 predict = pred_func(current_row[col - 1], upper_row + col); 600 } 601 predict_diff = VP8LSubPixels(current_row[col], predict); 602 ++histo[0][predict_diff >> 24]; 603 ++histo[1][((predict_diff >> 16) & 0xff)]; 604 ++histo[2][((predict_diff >> 8) & 0xff)]; 605 ++histo[3][(predict_diff & 0xff)]; 606 } 607 } 608 cur_diff = PredictionCostSpatialHistogram(accumulated, histo); 609 if (cur_diff < best_diff) { 610 best_diff = cur_diff; 611 best_mode = mode; 612 } 613 } 614 615 return best_mode; 616} 617 618static void CopyTileWithPrediction(int width, int height, 619 int tile_x, int tile_y, int bits, int mode, 620 const uint32_t* const argb_scratch, 621 uint32_t* const argb) { 622 const int col_start = tile_x << bits; 623 const int row_start = tile_y << bits; 624 const int tile_size = 1 << bits; 625 const int ymax = (tile_size <= height - row_start) ? 626 tile_size : height - row_start; 627 const int xmax = (tile_size <= width - col_start) ? 628 tile_size : width - col_start; 629 const PredictorFunc pred_func = kPredictors[mode]; 630 const uint32_t* current_row = argb_scratch; 631 632 int y; 633 for (y = 0; y < ymax; ++y) { 634 int x; 635 const int row = row_start + y; 636 const uint32_t* const upper_row = current_row; 637 current_row = upper_row + width; 638 for (x = 0; x < xmax; ++x) { 639 const int col = col_start + x; 640 const int pix = row * width + col; 641 uint32_t predict; 642 if (row == 0) { 643 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left. 644 } else if (col == 0) { 645 predict = upper_row[col]; // Top. 646 } else { 647 predict = pred_func(current_row[col - 1], upper_row + col); 648 } 649 argb[pix] = VP8LSubPixels(current_row[col], predict); 650 } 651 } 652} 653 654void VP8LResidualImage(int width, int height, int bits, 655 uint32_t* const argb, uint32_t* const argb_scratch, 656 uint32_t* const image) { 657 const int max_tile_size = 1 << bits; 658 const int tiles_per_row = VP8LSubSampleSize(width, bits); 659 const int tiles_per_col = VP8LSubSampleSize(height, bits); 660 uint32_t* const upper_row = argb_scratch; 661 uint32_t* const current_tile_rows = argb_scratch + width; 662 int tile_y; 663 int histo[4][256]; 664 memset(histo, 0, sizeof(histo)); 665 for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { 666 const int tile_y_offset = tile_y * max_tile_size; 667 const int this_tile_height = 668 (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset; 669 int tile_x; 670 if (tile_y > 0) { 671 memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width, 672 width * sizeof(*upper_row)); 673 } 674 memcpy(current_tile_rows, &argb[tile_y_offset * width], 675 this_tile_height * width * sizeof(*current_tile_rows)); 676 for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { 677 int pred; 678 int y; 679 const int tile_x_offset = tile_x * max_tile_size; 680 int all_x_max = tile_x_offset + max_tile_size; 681 if (all_x_max > width) { 682 all_x_max = width; 683 } 684 pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo, 685 argb_scratch); 686 image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8); 687 CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred, 688 argb_scratch, argb); 689 for (y = 0; y < max_tile_size; ++y) { 690 int ix; 691 int all_x; 692 int all_y = tile_y_offset + y; 693 if (all_y >= height) { 694 break; 695 } 696 ix = all_y * width + tile_x_offset; 697 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 698 const uint32_t a = argb[ix]; 699 ++histo[0][a >> 24]; 700 ++histo[1][((a >> 16) & 0xff)]; 701 ++histo[2][((a >> 8) & 0xff)]; 702 ++histo[3][(a & 0xff)]; 703 } 704 } 705 } 706 } 707} 708 709// Inverse prediction. 710static void PredictorInverseTransform(const VP8LTransform* const transform, 711 int y_start, int y_end, uint32_t* data) { 712 const int width = transform->xsize_; 713 if (y_start == 0) { // First Row follows the L (mode=1) mode. 714 int x; 715 const uint32_t pred0 = Predictor0(data[-1], NULL); 716 AddPixelsEq(data, pred0); 717 for (x = 1; x < width; ++x) { 718 const uint32_t pred1 = Predictor1(data[x - 1], NULL); 719 AddPixelsEq(data + x, pred1); 720 } 721 data += width; 722 ++y_start; 723 } 724 725 { 726 int y = y_start; 727 const int mask = (1 << transform->bits_) - 1; 728 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_); 729 const uint32_t* pred_mode_base = 730 transform->data_ + (y >> transform->bits_) * tiles_per_row; 731 732 while (y < y_end) { 733 int x; 734 const uint32_t pred2 = Predictor2(data[-1], data - width); 735 const uint32_t* pred_mode_src = pred_mode_base; 736 PredictorFunc pred_func; 737 738 // First pixel follows the T (mode=2) mode. 739 AddPixelsEq(data, pred2); 740 741 // .. the rest: 742 pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf]; 743 for (x = 1; x < width; ++x) { 744 uint32_t pred; 745 if ((x & mask) == 0) { // start of tile. Read predictor function. 746 pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf]; 747 } 748 pred = pred_func(data[x - 1], data + x - width); 749 AddPixelsEq(data + x, pred); 750 } 751 data += width; 752 ++y; 753 if ((y & mask) == 0) { // Use the same mask, since tiles are squares. 754 pred_mode_base += tiles_per_row; 755 } 756 } 757 } 758} 759 760static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) { 761 int i = 0; 762 for (; i < num_pixs; ++i) { 763 const uint32_t argb = argb_data[i]; 764 const uint32_t green = (argb >> 8) & 0xff; 765 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff; 766 const uint32_t new_b = ((argb & 0xff) - green) & 0xff; 767 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b; 768 } 769} 770 771// Add green to blue and red channels (i.e. perform the inverse transform of 772// 'subtract green'). 773static void AddGreenToBlueAndRed(uint32_t* data, const uint32_t* data_end) { 774 while (data < data_end) { 775 const uint32_t argb = *data; 776 const uint32_t green = ((argb >> 8) & 0xff); 777 uint32_t red_blue = (argb & 0x00ff00ffu); 778 red_blue += (green << 16) | green; 779 red_blue &= 0x00ff00ffu; 780 *data++ = (argb & 0xff00ff00u) | red_blue; 781 } 782} 783 784typedef struct { 785 // Note: the members are uint8_t, so that any negative values are 786 // automatically converted to "mod 256" values. 787 uint8_t green_to_red_; 788 uint8_t green_to_blue_; 789 uint8_t red_to_blue_; 790} Multipliers; 791 792static WEBP_INLINE void MultipliersClear(Multipliers* m) { 793 m->green_to_red_ = 0; 794 m->green_to_blue_ = 0; 795 m->red_to_blue_ = 0; 796} 797 798static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, 799 int8_t color) { 800 return (uint32_t)((int)(color_pred) * color) >> 5; 801} 802 803static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code, 804 Multipliers* const m) { 805 m->green_to_red_ = (color_code >> 0) & 0xff; 806 m->green_to_blue_ = (color_code >> 8) & 0xff; 807 m->red_to_blue_ = (color_code >> 16) & 0xff; 808} 809 810static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) { 811 return 0xff000000u | 812 ((uint32_t)(m->red_to_blue_) << 16) | 813 ((uint32_t)(m->green_to_blue_) << 8) | 814 m->green_to_red_; 815} 816 817static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m, 818 uint32_t argb, int inverse) { 819 const uint32_t green = argb >> 8; 820 const uint32_t red = argb >> 16; 821 uint32_t new_red = red; 822 uint32_t new_blue = argb; 823 824 if (inverse) { 825 new_red += ColorTransformDelta(m->green_to_red_, green); 826 new_red &= 0xff; 827 new_blue += ColorTransformDelta(m->green_to_blue_, green); 828 new_blue += ColorTransformDelta(m->red_to_blue_, new_red); 829 new_blue &= 0xff; 830 } else { 831 new_red -= ColorTransformDelta(m->green_to_red_, green); 832 new_red &= 0xff; 833 new_blue -= ColorTransformDelta(m->green_to_blue_, green); 834 new_blue -= ColorTransformDelta(m->red_to_blue_, red); 835 new_blue &= 0xff; 836 } 837 return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); 838} 839 840static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, 841 uint32_t argb) { 842 const uint32_t green = argb >> 8; 843 uint32_t new_red = argb >> 16; 844 new_red -= ColorTransformDelta(green_to_red, green); 845 return (new_red & 0xff); 846} 847 848static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, 849 uint8_t red_to_blue, 850 uint32_t argb) { 851 const uint32_t green = argb >> 8; 852 const uint32_t red = argb >> 16; 853 uint8_t new_blue = argb; 854 new_blue -= ColorTransformDelta(green_to_blue, green); 855 new_blue -= ColorTransformDelta(red_to_blue, red); 856 return (new_blue & 0xff); 857} 858 859static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb, 860 int ix, int xsize) { 861 const uint32_t v = argb[ix]; 862 if (ix >= xsize + 3) { 863 if (v == argb[ix - xsize] && 864 argb[ix - 1] == argb[ix - xsize - 1] && 865 argb[ix - 2] == argb[ix - xsize - 2] && 866 argb[ix - 3] == argb[ix - xsize - 3]) { 867 return 1; 868 } 869 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1]; 870 } else if (ix >= 3) { 871 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1]; 872 } 873 return 0; 874} 875 876static float PredictionCostCrossColor(const int accumulated[256], 877 const int counts[256]) { 878 // Favor low entropy, locally and globally. 879 // Favor small absolute values for PredictionCostSpatial 880 static const double kExpValue = 2.4; 881 return CombinedShannonEntropy(counts, accumulated, 256) + 882 PredictionCostSpatial(counts, 3, kExpValue); 883} 884 885static Multipliers GetBestColorTransformForTile( 886 int tile_x, int tile_y, int bits, 887 Multipliers prevX, 888 Multipliers prevY, 889 int step, int xsize, int ysize, 890 int* accumulated_red_histo, 891 int* accumulated_blue_histo, 892 const uint32_t* const argb) { 893 float best_diff = MAX_DIFF_COST; 894 float cur_diff; 895 const int halfstep = step / 2; 896 const int max_tile_size = 1 << bits; 897 const int tile_y_offset = tile_y * max_tile_size; 898 const int tile_x_offset = tile_x * max_tile_size; 899 int green_to_red; 900 int green_to_blue; 901 int red_to_blue; 902 int all_x_max = tile_x_offset + max_tile_size; 903 int all_y_max = tile_y_offset + max_tile_size; 904 Multipliers best_tx; 905 MultipliersClear(&best_tx); 906 if (all_x_max > xsize) { 907 all_x_max = xsize; 908 } 909 if (all_y_max > ysize) { 910 all_y_max = ysize; 911 } 912 913 for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) { 914 int histo[256] = { 0 }; 915 int all_y; 916 917 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { 918 int ix = all_y * xsize + tile_x_offset; 919 int all_x; 920 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 921 if (SkipRepeatedPixels(argb, ix, xsize)) { 922 continue; 923 } 924 ++histo[TransformColorRed(green_to_red, argb[ix])]; // red. 925 } 926 } 927 cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]); 928 if ((uint8_t)green_to_red == prevX.green_to_red_) { 929 cur_diff -= 3; // favor keeping the areas locally similar 930 } 931 if ((uint8_t)green_to_red == prevY.green_to_red_) { 932 cur_diff -= 3; // favor keeping the areas locally similar 933 } 934 if (green_to_red == 0) { 935 cur_diff -= 3; 936 } 937 if (cur_diff < best_diff) { 938 best_diff = cur_diff; 939 best_tx.green_to_red_ = green_to_red; 940 } 941 } 942 best_diff = MAX_DIFF_COST; 943 for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) { 944 for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) { 945 int all_y; 946 int histo[256] = { 0 }; 947 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { 948 int all_x; 949 int ix = all_y * xsize + tile_x_offset; 950 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 951 if (SkipRepeatedPixels(argb, ix, xsize)) { 952 continue; 953 } 954 ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])]; 955 } 956 } 957 cur_diff = 958 PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]); 959 if ((uint8_t)green_to_blue == prevX.green_to_blue_) { 960 cur_diff -= 3; // favor keeping the areas locally similar 961 } 962 if ((uint8_t)green_to_blue == prevY.green_to_blue_) { 963 cur_diff -= 3; // favor keeping the areas locally similar 964 } 965 if ((uint8_t)red_to_blue == prevX.red_to_blue_) { 966 cur_diff -= 3; // favor keeping the areas locally similar 967 } 968 if ((uint8_t)red_to_blue == prevY.red_to_blue_) { 969 cur_diff -= 3; // favor keeping the areas locally similar 970 } 971 if (green_to_blue == 0) { 972 cur_diff -= 3; 973 } 974 if (red_to_blue == 0) { 975 cur_diff -= 3; 976 } 977 if (cur_diff < best_diff) { 978 best_diff = cur_diff; 979 best_tx.green_to_blue_ = green_to_blue; 980 best_tx.red_to_blue_ = red_to_blue; 981 } 982 } 983 } 984 return best_tx; 985} 986 987static void CopyTileWithColorTransform(int xsize, int ysize, 988 int tile_x, int tile_y, int bits, 989 Multipliers color_transform, 990 uint32_t* const argb) { 991 int y; 992 int xscan = 1 << bits; 993 int yscan = 1 << bits; 994 tile_x <<= bits; 995 tile_y <<= bits; 996 if (xscan > xsize - tile_x) { 997 xscan = xsize - tile_x; 998 } 999 if (yscan > ysize - tile_y) { 1000 yscan = ysize - tile_y; 1001 } 1002 yscan += tile_y; 1003 for (y = tile_y; y < yscan; ++y) { 1004 int ix = y * xsize + tile_x; 1005 const int end_ix = ix + xscan; 1006 for (; ix < end_ix; ++ix) { 1007 argb[ix] = TransformColor(&color_transform, argb[ix], 0); 1008 } 1009 } 1010} 1011 1012void VP8LColorSpaceTransform(int width, int height, int bits, int step, 1013 uint32_t* const argb, uint32_t* image) { 1014 const int max_tile_size = 1 << bits; 1015 int tile_xsize = VP8LSubSampleSize(width, bits); 1016 int tile_ysize = VP8LSubSampleSize(height, bits); 1017 int accumulated_red_histo[256] = { 0 }; 1018 int accumulated_blue_histo[256] = { 0 }; 1019 int tile_y; 1020 int tile_x; 1021 Multipliers prevX; 1022 Multipliers prevY; 1023 MultipliersClear(&prevY); 1024 MultipliersClear(&prevX); 1025 for (tile_y = 0; tile_y < tile_ysize; ++tile_y) { 1026 for (tile_x = 0; tile_x < tile_xsize; ++tile_x) { 1027 Multipliers color_transform; 1028 int all_x_max; 1029 int y; 1030 const int tile_y_offset = tile_y * max_tile_size; 1031 const int tile_x_offset = tile_x * max_tile_size; 1032 if (tile_y != 0) { 1033 ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX); 1034 ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x], 1035 &prevY); 1036 } else if (tile_x != 0) { 1037 ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX); 1038 } 1039 color_transform = 1040 GetBestColorTransformForTile(tile_x, tile_y, bits, 1041 prevX, prevY, 1042 step, width, height, 1043 &accumulated_red_histo[0], 1044 &accumulated_blue_histo[0], 1045 argb); 1046 image[tile_y * tile_xsize + tile_x] = 1047 MultipliersToColorCode(&color_transform); 1048 CopyTileWithColorTransform(width, height, tile_x, tile_y, bits, 1049 color_transform, argb); 1050 1051 // Gather accumulated histogram data. 1052 all_x_max = tile_x_offset + max_tile_size; 1053 if (all_x_max > width) { 1054 all_x_max = width; 1055 } 1056 for (y = 0; y < max_tile_size; ++y) { 1057 int ix; 1058 int all_x; 1059 int all_y = tile_y_offset + y; 1060 if (all_y >= height) { 1061 break; 1062 } 1063 ix = all_y * width + tile_x_offset; 1064 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 1065 if (ix >= 2 && 1066 argb[ix] == argb[ix - 2] && 1067 argb[ix] == argb[ix - 1]) { 1068 continue; // repeated pixels are handled by backward references 1069 } 1070 if (ix >= width + 2 && 1071 argb[ix - 2] == argb[ix - width - 2] && 1072 argb[ix - 1] == argb[ix - width - 1] && 1073 argb[ix] == argb[ix - width]) { 1074 continue; // repeated pixels are handled by backward references 1075 } 1076 ++accumulated_red_histo[(argb[ix] >> 16) & 0xff]; 1077 ++accumulated_blue_histo[argb[ix] & 0xff]; 1078 } 1079 } 1080 } 1081 } 1082} 1083 1084// Color space inverse transform. 1085static void ColorSpaceInverseTransform(const VP8LTransform* const transform, 1086 int y_start, int y_end, uint32_t* data) { 1087 const int width = transform->xsize_; 1088 const int mask = (1 << transform->bits_) - 1; 1089 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_); 1090 int y = y_start; 1091 const uint32_t* pred_row = 1092 transform->data_ + (y >> transform->bits_) * tiles_per_row; 1093 1094 while (y < y_end) { 1095 const uint32_t* pred = pred_row; 1096 Multipliers m = { 0, 0, 0 }; 1097 int x; 1098 1099 for (x = 0; x < width; ++x) { 1100 if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m); 1101 data[x] = TransformColor(&m, data[x], 1); 1102 } 1103 data += width; 1104 ++y; 1105 if ((y & mask) == 0) pred_row += tiles_per_row;; 1106 } 1107} 1108 1109// Separate out pixels packed together using pixel-bundling. 1110// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t). 1111#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ 1112void FUNC_NAME(const VP8LTransform* const transform, \ 1113 int y_start, int y_end, const TYPE* src, TYPE* dst) { \ 1114 int y; \ 1115 const int bits_per_pixel = 8 >> transform->bits_; \ 1116 const int width = transform->xsize_; \ 1117 const uint32_t* const color_map = transform->data_; \ 1118 if (bits_per_pixel < 8) { \ 1119 const int pixels_per_byte = 1 << transform->bits_; \ 1120 const int count_mask = pixels_per_byte - 1; \ 1121 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \ 1122 for (y = y_start; y < y_end; ++y) { \ 1123 uint32_t packed_pixels = 0; \ 1124 int x; \ 1125 for (x = 0; x < width; ++x) { \ 1126 /* We need to load fresh 'packed_pixels' once every */ \ 1127 /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \ 1128 /* is a power of 2, so can just use a mask for that, instead of */ \ 1129 /* decrementing a counter. */ \ 1130 if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \ 1131 *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \ 1132 packed_pixels >>= bits_per_pixel; \ 1133 } \ 1134 } \ 1135 } else { \ 1136 for (y = y_start; y < y_end; ++y) { \ 1137 int x; \ 1138 for (x = 0; x < width; ++x) { \ 1139 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ 1140 } \ 1141 } \ 1142 } \ 1143} 1144 1145static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) { 1146 return (idx >> 8) & 0xff; 1147} 1148 1149static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) { 1150 return idx; 1151} 1152 1153static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) { 1154 return val; 1155} 1156 1157static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) { 1158 return (val >> 8) & 0xff; 1159} 1160 1161static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex, 1162 GetARGBValue) 1163COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex, 1164 GetAlphaValue) 1165 1166#undef COLOR_INDEX_INVERSE 1167 1168void VP8LInverseTransform(const VP8LTransform* const transform, 1169 int row_start, int row_end, 1170 const uint32_t* const in, uint32_t* const out) { 1171 const int width = transform->xsize_; 1172 assert(row_start < row_end); 1173 assert(row_end <= transform->ysize_); 1174 switch (transform->type_) { 1175 case SUBTRACT_GREEN: 1176 VP8LAddGreenToBlueAndRed(out, out + (row_end - row_start) * width); 1177 break; 1178 case PREDICTOR_TRANSFORM: 1179 PredictorInverseTransform(transform, row_start, row_end, out); 1180 if (row_end != transform->ysize_) { 1181 // The last predicted row in this iteration will be the top-pred row 1182 // for the first row in next iteration. 1183 memcpy(out - width, out + (row_end - row_start - 1) * width, 1184 width * sizeof(*out)); 1185 } 1186 break; 1187 case CROSS_COLOR_TRANSFORM: 1188 ColorSpaceInverseTransform(transform, row_start, row_end, out); 1189 break; 1190 case COLOR_INDEXING_TRANSFORM: 1191 if (in == out && transform->bits_ > 0) { 1192 // Move packed pixels to the end of unpacked region, so that unpacking 1193 // can occur seamlessly. 1194 // Also, note that this is the only transform that applies on 1195 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other 1196 // transforms work on effective width of xsize_. 1197 const int out_stride = (row_end - row_start) * width; 1198 const int in_stride = (row_end - row_start) * 1199 VP8LSubSampleSize(transform->xsize_, transform->bits_); 1200 uint32_t* const src = out + out_stride - in_stride; 1201 memmove(src, out, in_stride * sizeof(*src)); 1202 ColorIndexInverseTransform(transform, row_start, row_end, src, out); 1203 } else { 1204 ColorIndexInverseTransform(transform, row_start, row_end, in, out); 1205 } 1206 break; 1207 } 1208} 1209 1210//------------------------------------------------------------------------------ 1211// Color space conversion. 1212 1213static int is_big_endian(void) { 1214 static const union { 1215 uint16_t w; 1216 uint8_t b[2]; 1217 } tmp = { 1 }; 1218 return (tmp.b[0] != 1); 1219} 1220 1221static void ConvertBGRAToRGB(const uint32_t* src, 1222 int num_pixels, uint8_t* dst) { 1223 const uint32_t* const src_end = src + num_pixels; 1224 while (src < src_end) { 1225 const uint32_t argb = *src++; 1226 *dst++ = (argb >> 16) & 0xff; 1227 *dst++ = (argb >> 8) & 0xff; 1228 *dst++ = (argb >> 0) & 0xff; 1229 } 1230} 1231 1232static void ConvertBGRAToRGBA(const uint32_t* src, 1233 int num_pixels, uint8_t* dst) { 1234 const uint32_t* const src_end = src + num_pixels; 1235 while (src < src_end) { 1236 const uint32_t argb = *src++; 1237 *dst++ = (argb >> 16) & 0xff; 1238 *dst++ = (argb >> 8) & 0xff; 1239 *dst++ = (argb >> 0) & 0xff; 1240 *dst++ = (argb >> 24) & 0xff; 1241 } 1242} 1243 1244static void ConvertBGRAToRGBA4444(const uint32_t* src, 1245 int num_pixels, uint8_t* dst) { 1246 const uint32_t* const src_end = src + num_pixels; 1247 while (src < src_end) { 1248 const uint32_t argb = *src++; 1249 const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf); 1250 const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf); 1251#ifdef WEBP_SWAP_16BIT_CSP 1252 *dst++ = ba; 1253 *dst++ = rg; 1254#else 1255 *dst++ = rg; 1256 *dst++ = ba; 1257#endif 1258 } 1259} 1260 1261static void ConvertBGRAToRGB565(const uint32_t* src, 1262 int num_pixels, uint8_t* dst) { 1263 const uint32_t* const src_end = src + num_pixels; 1264 while (src < src_end) { 1265 const uint32_t argb = *src++; 1266 const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7); 1267 const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f); 1268#ifdef WEBP_SWAP_16BIT_CSP 1269 *dst++ = gb; 1270 *dst++ = rg; 1271#else 1272 *dst++ = rg; 1273 *dst++ = gb; 1274#endif 1275 } 1276} 1277 1278static void ConvertBGRAToBGR(const uint32_t* src, 1279 int num_pixels, uint8_t* dst) { 1280 const uint32_t* const src_end = src + num_pixels; 1281 while (src < src_end) { 1282 const uint32_t argb = *src++; 1283 *dst++ = (argb >> 0) & 0xff; 1284 *dst++ = (argb >> 8) & 0xff; 1285 *dst++ = (argb >> 16) & 0xff; 1286 } 1287} 1288 1289static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, 1290 int swap_on_big_endian) { 1291 if (is_big_endian() == swap_on_big_endian) { 1292 const uint32_t* const src_end = src + num_pixels; 1293 while (src < src_end) { 1294 uint32_t argb = *src++; 1295 1296#if !defined(__BIG_ENDIAN__) 1297#if !defined(WEBP_REFERENCE_IMPLEMENTATION) 1298#if defined(__i386__) || defined(__x86_64__) 1299 __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb)); 1300 *(uint32_t*)dst = argb; 1301#elif defined(_MSC_VER) 1302 argb = _byteswap_ulong(argb); 1303 *(uint32_t*)dst = argb; 1304#else 1305 dst[0] = (argb >> 24) & 0xff; 1306 dst[1] = (argb >> 16) & 0xff; 1307 dst[2] = (argb >> 8) & 0xff; 1308 dst[3] = (argb >> 0) & 0xff; 1309#endif 1310#else // WEBP_REFERENCE_IMPLEMENTATION 1311 dst[0] = (argb >> 24) & 0xff; 1312 dst[1] = (argb >> 16) & 0xff; 1313 dst[2] = (argb >> 8) & 0xff; 1314 dst[3] = (argb >> 0) & 0xff; 1315#endif 1316#else // __BIG_ENDIAN__ 1317 dst[0] = (argb >> 0) & 0xff; 1318 dst[1] = (argb >> 8) & 0xff; 1319 dst[2] = (argb >> 16) & 0xff; 1320 dst[3] = (argb >> 24) & 0xff; 1321#endif 1322 dst += sizeof(argb); 1323 } 1324 } else { 1325 memcpy(dst, src, num_pixels * sizeof(*src)); 1326 } 1327} 1328 1329void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, 1330 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) { 1331 switch (out_colorspace) { 1332 case MODE_RGB: 1333 ConvertBGRAToRGB(in_data, num_pixels, rgba); 1334 break; 1335 case MODE_RGBA: 1336 ConvertBGRAToRGBA(in_data, num_pixels, rgba); 1337 break; 1338 case MODE_rgbA: 1339 ConvertBGRAToRGBA(in_data, num_pixels, rgba); 1340 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); 1341 break; 1342 case MODE_BGR: 1343 ConvertBGRAToBGR(in_data, num_pixels, rgba); 1344 break; 1345 case MODE_BGRA: 1346 CopyOrSwap(in_data, num_pixels, rgba, 1); 1347 break; 1348 case MODE_bgrA: 1349 CopyOrSwap(in_data, num_pixels, rgba, 1); 1350 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); 1351 break; 1352 case MODE_ARGB: 1353 CopyOrSwap(in_data, num_pixels, rgba, 0); 1354 break; 1355 case MODE_Argb: 1356 CopyOrSwap(in_data, num_pixels, rgba, 0); 1357 WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0); 1358 break; 1359 case MODE_RGBA_4444: 1360 ConvertBGRAToRGBA4444(in_data, num_pixels, rgba); 1361 break; 1362 case MODE_rgbA_4444: 1363 ConvertBGRAToRGBA4444(in_data, num_pixels, rgba); 1364 WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0); 1365 break; 1366 case MODE_RGB_565: 1367 ConvertBGRAToRGB565(in_data, num_pixels, rgba); 1368 break; 1369 default: 1370 assert(0); // Code flow should not reach here. 1371 } 1372} 1373 1374// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel. 1375void VP8LBundleColorMap(const uint8_t* const row, int width, 1376 int xbits, uint32_t* const dst) { 1377 int x; 1378 if (xbits > 0) { 1379 const int bit_depth = 1 << (3 - xbits); 1380 const int mask = (1 << xbits) - 1; 1381 uint32_t code = 0xff000000; 1382 for (x = 0; x < width; ++x) { 1383 const int xsub = x & mask; 1384 if (xsub == 0) { 1385 code = 0xff000000; 1386 } 1387 code |= row[x] << (8 + bit_depth * xsub); 1388 dst[x >> xbits] = code; 1389 } 1390 } else { 1391 for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8); 1392 } 1393} 1394 1395//------------------------------------------------------------------------------ 1396 1397// TODO(vikasa): Move the SSE2 functions to lossless_dsp.c (new file), once 1398// color-space conversion methods (ConvertFromBGRA) are also updated for SSE2. 1399#if defined(WEBP_USE_SSE2) 1400static WEBP_INLINE uint32_t ClampedAddSubtractFullSSE2(uint32_t c0, uint32_t c1, 1401 uint32_t c2) { 1402 const __m128i zero = _mm_setzero_si128(); 1403 const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero); 1404 const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero); 1405 const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero); 1406 const __m128i V1 = _mm_add_epi16(C0, C1); 1407 const __m128i V2 = _mm_sub_epi16(V1, C2); 1408 const __m128i b = _mm_packus_epi16(V2, V2); 1409 const uint32_t output = _mm_cvtsi128_si32(b); 1410 return output; 1411} 1412 1413static WEBP_INLINE uint32_t ClampedAddSubtractHalfSSE2(uint32_t c0, uint32_t c1, 1414 uint32_t c2) { 1415 const uint32_t ave = Average2(c0, c1); 1416 const __m128i zero = _mm_setzero_si128(); 1417 const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero); 1418 const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero); 1419 const __m128i A1 = _mm_sub_epi16(A0, B0); 1420 const __m128i BgtA = _mm_cmpgt_epi16(B0, A0); 1421 const __m128i A2 = _mm_sub_epi16(A1, BgtA); 1422 const __m128i A3 = _mm_srai_epi16(A2, 1); 1423 const __m128i A4 = _mm_add_epi16(A0, A3); 1424 const __m128i A5 = _mm_packus_epi16(A4, A4); 1425 const uint32_t output = _mm_cvtsi128_si32(A5); 1426 return output; 1427} 1428 1429static WEBP_INLINE uint32_t SelectSSE2(uint32_t a, uint32_t b, uint32_t c) { 1430 int pa_minus_pb; 1431 const __m128i zero = _mm_setzero_si128(); 1432 const __m128i A0 = _mm_cvtsi32_si128(a); 1433 const __m128i B0 = _mm_cvtsi32_si128(b); 1434 const __m128i C0 = _mm_cvtsi32_si128(c); 1435 const __m128i AC0 = _mm_subs_epu8(A0, C0); 1436 const __m128i CA0 = _mm_subs_epu8(C0, A0); 1437 const __m128i BC0 = _mm_subs_epu8(B0, C0); 1438 const __m128i CB0 = _mm_subs_epu8(C0, B0); 1439 const __m128i AC = _mm_or_si128(AC0, CA0); 1440 const __m128i BC = _mm_or_si128(BC0, CB0); 1441 const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c| 1442 const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c| 1443 const __m128i diff = _mm_sub_epi16(pb, pa); 1444 { 1445 int16_t out[8]; 1446 _mm_storeu_si128((__m128i*)out, diff); 1447 pa_minus_pb = out[0] + out[1] + out[2] + out[3]; 1448 } 1449 return (pa_minus_pb <= 0) ? a : b; 1450} 1451 1452static void SubtractGreenFromBlueAndRedSSE2(uint32_t* argb_data, int num_pixs) { 1453 int i = 0; 1454 const __m128i mask = _mm_set1_epi32(0x0000ff00); 1455 for (; i + 4 < num_pixs; i += 4) { 1456 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); 1457 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|... 1458 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|... 1459 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|... 1460 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g); 1461 const __m128i out = _mm_sub_epi8(in, in_0g0g); 1462 _mm_storeu_si128((__m128i*)&argb_data[i], out); 1463 } 1464 // fallthrough and finish off with plain-C 1465 for (; i < num_pixs; ++i) { 1466 const uint32_t argb = argb_data[i]; 1467 const uint32_t green = (argb >> 8) & 0xff; 1468 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff; 1469 const uint32_t new_b = ((argb & 0xff) - green) & 0xff; 1470 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b; 1471 } 1472} 1473 1474static void AddGreenToBlueAndRedSSE2(uint32_t* data, const uint32_t* data_end) { 1475 const __m128i mask = _mm_set1_epi32(0x0000ff00); 1476 for (; data + 4 < data_end; data += 4) { 1477 const __m128i in = _mm_loadu_si128((__m128i*)data); 1478 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|... 1479 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|... 1480 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|... 1481 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g); 1482 const __m128i out = _mm_add_epi8(in, in_0g0g); 1483 _mm_storeu_si128((__m128i*)data, out); 1484 } 1485 // fallthrough and finish off with plain-C 1486 while (data < data_end) { 1487 const uint32_t argb = *data; 1488 const uint32_t green = ((argb >> 8) & 0xff); 1489 uint32_t red_blue = (argb & 0x00ff00ffu); 1490 red_blue += (green << 16) | green; 1491 red_blue &= 0x00ff00ffu; 1492 *data++ = (argb & 0xff00ff00u) | red_blue; 1493 } 1494} 1495 1496extern void VP8LDspInitSSE2(void); 1497 1498void VP8LDspInitSSE2(void) { 1499 VP8LClampedAddSubtractFull = ClampedAddSubtractFullSSE2; 1500 VP8LClampedAddSubtractHalf = ClampedAddSubtractHalfSSE2; 1501 VP8LSelect = SelectSSE2; 1502 VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRedSSE2; 1503 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRedSSE2; 1504} 1505#endif 1506//------------------------------------------------------------------------------ 1507 1508VP8LPredClampedAddSubFunc VP8LClampedAddSubtractFull; 1509VP8LPredClampedAddSubFunc VP8LClampedAddSubtractHalf; 1510VP8LPredSelectFunc VP8LSelect; 1511VP8LSubtractGreenFromBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed; 1512VP8LAddGreenToBlueAndRedFunc VP8LAddGreenToBlueAndRed; 1513 1514void VP8LDspInit(void) { 1515 VP8LClampedAddSubtractFull = ClampedAddSubtractFull; 1516 VP8LClampedAddSubtractHalf = ClampedAddSubtractHalf; 1517 VP8LSelect = Select; 1518 VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed; 1519 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed; 1520 1521 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 1522 if (VP8GetCPUInfo != NULL) { 1523#if defined(WEBP_USE_SSE2) 1524 if (VP8GetCPUInfo(kSSE2)) { 1525 VP8LDspInitSSE2(); 1526 } 1527#endif 1528 } 1529} 1530 1531//------------------------------------------------------------------------------ 1532 1533