1/* 2Copyright 2011 Google Inc. All Rights Reserved. 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15 16Author: lode.vandevenne@gmail.com (Lode Vandevenne) 17Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala) 18*/ 19 20#include "blocksplitter.h" 21 22#include <assert.h> 23#include <stdio.h> 24#include <stdlib.h> 25 26#include "deflate.h" 27#include "lz77.h" 28#include "squeeze.h" 29#include "tree.h" 30#include "util.h" 31 32/* 33The "f" for the FindMinimum function below. 34i: the current parameter of f(i) 35context: for your implementation 36*/ 37typedef double FindMinimumFun(size_t i, void* context); 38 39/* 40Finds minimum of function f(i) where is is of type size_t, f(i) is of type 41double, i is in range start-end (excluding end). 42*/ 43static size_t FindMinimum(FindMinimumFun f, void* context, 44 size_t start, size_t end) { 45 if (end - start < 1024) { 46 double best = ZOPFLI_LARGE_FLOAT; 47 size_t result = start; 48 size_t i; 49 for (i = start; i < end; i++) { 50 double v = f(i, context); 51 if (v < best) { 52 best = v; 53 result = i; 54 } 55 } 56 return result; 57 } else { 58 /* Try to find minimum faster by recursively checking multiple points. */ 59#define NUM 9 /* Good value: 9. */ 60 size_t i; 61 size_t p[NUM]; 62 double vp[NUM]; 63 size_t besti; 64 double best; 65 double lastbest = ZOPFLI_LARGE_FLOAT; 66 size_t pos = start; 67 68 for (;;) { 69 if (end - start <= NUM) break; 70 71 for (i = 0; i < NUM; i++) { 72 p[i] = start + (i + 1) * ((end - start) / (NUM + 1)); 73 vp[i] = f(p[i], context); 74 } 75 besti = 0; 76 best = vp[0]; 77 for (i = 1; i < NUM; i++) { 78 if (vp[i] < best) { 79 best = vp[i]; 80 besti = i; 81 } 82 } 83 if (best > lastbest) break; 84 85 start = besti == 0 ? start : p[besti - 1]; 86 end = besti == NUM - 1 ? end : p[besti + 1]; 87 88 pos = p[besti]; 89 lastbest = best; 90 } 91 return pos; 92#undef NUM 93 } 94} 95 96/* 97Returns estimated cost of a block in bits. It includes the size to encode the 98tree and the size to encode all literal, length and distance symbols and their 99extra bits. 100 101litlens: lz77 lit/lengths 102dists: ll77 distances 103lstart: start of block 104lend: end of block (not inclusive) 105*/ 106static double EstimateCost(const unsigned short* litlens, 107 const unsigned short* dists, 108 size_t lstart, size_t lend) { 109 return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2); 110} 111 112typedef struct SplitCostContext { 113 const unsigned short* litlens; 114 const unsigned short* dists; 115 size_t llsize; 116 size_t start; 117 size_t end; 118} SplitCostContext; 119 120 121/* 122Gets the cost which is the sum of the cost of the left and the right section 123of the data. 124type: FindMinimumFun 125*/ 126static double SplitCost(size_t i, void* context) { 127 SplitCostContext* c = (SplitCostContext*)context; 128 return EstimateCost(c->litlens, c->dists, c->start, i) + 129 EstimateCost(c->litlens, c->dists, i, c->end); 130} 131 132static void AddSorted(size_t value, size_t** out, size_t* outsize) { 133 size_t i; 134 ZOPFLI_APPEND_DATA(value, out, outsize); 135 for (i = 0; i + 1 < *outsize; i++) { 136 if ((*out)[i] > value) { 137 size_t j; 138 for (j = *outsize - 1; j > i; j--) { 139 (*out)[j] = (*out)[j - 1]; 140 } 141 (*out)[i] = value; 142 break; 143 } 144 } 145} 146 147/* 148Prints the block split points as decimal and hex values in the terminal. 149*/ 150static void PrintBlockSplitPoints(const unsigned short* litlens, 151 const unsigned short* dists, 152 size_t llsize, const size_t* lz77splitpoints, 153 size_t nlz77points) { 154 size_t* splitpoints = 0; 155 size_t npoints = 0; 156 size_t i; 157 /* The input is given as lz77 indices, but we want to see the uncompressed 158 index values. */ 159 size_t pos = 0; 160 if (nlz77points > 0) { 161 for (i = 0; i < llsize; i++) { 162 size_t length = dists[i] == 0 ? 1 : litlens[i]; 163 if (lz77splitpoints[npoints] == i) { 164 ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints); 165 if (npoints == nlz77points) break; 166 } 167 pos += length; 168 } 169 } 170 assert(npoints == nlz77points); 171 172 fprintf(stderr, "block split points: "); 173 for (i = 0; i < npoints; i++) { 174 fprintf(stderr, "%d ", (int)splitpoints[i]); 175 } 176 fprintf(stderr, "(hex:"); 177 for (i = 0; i < npoints; i++) { 178 fprintf(stderr, " %x", (int)splitpoints[i]); 179 } 180 fprintf(stderr, ")\n"); 181 182 free(splitpoints); 183} 184 185/* 186Finds next block to try to split, the largest of the available ones. 187The largest is chosen to make sure that if only a limited amount of blocks is 188requested, their sizes are spread evenly. 189llsize: the size of the LL77 data, which is the size of the done array here. 190done: array indicating which blocks starting at that position are no longer 191 splittable (splitting them increases rather than decreases cost). 192splitpoints: the splitpoints found so far. 193npoints: the amount of splitpoints found so far. 194lstart: output variable, giving start of block. 195lend: output variable, giving end of block. 196returns 1 if a block was found, 0 if no block found (all are done). 197*/ 198static int FindLargestSplittableBlock( 199 size_t llsize, const unsigned char* done, 200 const size_t* splitpoints, size_t npoints, 201 size_t* lstart, size_t* lend) { 202 size_t longest = 0; 203 int found = 0; 204 size_t i; 205 for (i = 0; i <= npoints; i++) { 206 size_t start = i == 0 ? 0 : splitpoints[i - 1]; 207 size_t end = i == npoints ? llsize - 1 : splitpoints[i]; 208 if (!done[start] && end - start > longest) { 209 *lstart = start; 210 *lend = end; 211 found = 1; 212 longest = end - start; 213 } 214 } 215 return found; 216} 217 218void ZopfliBlockSplitLZ77(const ZopfliOptions* options, 219 const unsigned short* litlens, 220 const unsigned short* dists, 221 size_t llsize, size_t maxblocks, 222 size_t** splitpoints, size_t* npoints) { 223 size_t lstart, lend; 224 size_t i; 225 size_t llpos = 0; 226 size_t numblocks = 1; 227 unsigned char* done; 228 double splitcost, origcost; 229 230 if (llsize < 10) return; /* This code fails on tiny files. */ 231 232 done = (unsigned char*)malloc(llsize); 233 if (!done) exit(-1); /* Allocation failed. */ 234 for (i = 0; i < llsize; i++) done[i] = 0; 235 236 lstart = 0; 237 lend = llsize; 238 for (;;) { 239 SplitCostContext c; 240 241 if (maxblocks > 0 && numblocks >= maxblocks) { 242 break; 243 } 244 245 c.litlens = litlens; 246 c.dists = dists; 247 c.llsize = llsize; 248 c.start = lstart; 249 c.end = lend; 250 assert(lstart < lend); 251 llpos = FindMinimum(SplitCost, &c, lstart + 1, lend); 252 253 assert(llpos > lstart); 254 assert(llpos < lend); 255 256 splitcost = EstimateCost(litlens, dists, lstart, llpos) + 257 EstimateCost(litlens, dists, llpos, lend); 258 origcost = EstimateCost(litlens, dists, lstart, lend); 259 260 if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) { 261 done[lstart] = 1; 262 } else { 263 AddSorted(llpos, splitpoints, npoints); 264 numblocks++; 265 } 266 267 if (!FindLargestSplittableBlock( 268 llsize, done, *splitpoints, *npoints, &lstart, &lend)) { 269 break; /* No further split will probably reduce compression. */ 270 } 271 272 if (lend - lstart < 10) { 273 break; 274 } 275 } 276 277 if (options->verbose) { 278 PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints); 279 } 280 281 free(done); 282} 283 284void ZopfliBlockSplit(const ZopfliOptions* options, 285 const unsigned char* in, size_t instart, size_t inend, 286 size_t maxblocks, size_t** splitpoints, size_t* npoints) { 287 size_t pos = 0; 288 size_t i; 289 ZopfliBlockState s; 290 size_t* lz77splitpoints = 0; 291 size_t nlz77points = 0; 292 ZopfliLZ77Store store; 293 294 ZopfliInitLZ77Store(&store); 295 296 s.options = options; 297 s.blockstart = instart; 298 s.blockend = inend; 299#ifdef ZOPFLI_LONGEST_MATCH_CACHE 300 s.lmc = 0; 301#endif 302 303 *npoints = 0; 304 *splitpoints = 0; 305 306 /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal 307 results in better blocks. */ 308 ZopfliLZ77Greedy(&s, in, instart, inend, &store); 309 310 ZopfliBlockSplitLZ77(options, 311 store.litlens, store.dists, store.size, maxblocks, 312 &lz77splitpoints, &nlz77points); 313 314 /* Convert LZ77 positions to positions in the uncompressed input. */ 315 pos = instart; 316 if (nlz77points > 0) { 317 for (i = 0; i < store.size; i++) { 318 size_t length = store.dists[i] == 0 ? 1 : store.litlens[i]; 319 if (lz77splitpoints[*npoints] == i) { 320 ZOPFLI_APPEND_DATA(pos, splitpoints, npoints); 321 if (*npoints == nlz77points) break; 322 } 323 pos += length; 324 } 325 } 326 assert(*npoints == nlz77points); 327 328 free(lz77splitpoints); 329 ZopfliCleanLZ77Store(&store); 330} 331 332void ZopfliBlockSplitSimple(const unsigned char* in, 333 size_t instart, size_t inend, 334 size_t blocksize, 335 size_t** splitpoints, size_t* npoints) { 336 size_t i = instart; 337 while (i < inend) { 338 ZOPFLI_APPEND_DATA(i, splitpoints, npoints); 339 i += blocksize; 340 } 341 (void)in; 342} 343