1/*
2Copyright 2011 Google Inc. All Rights Reserved.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15
16Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18*/
19
20#include "blocksplitter.h"
21
22#include <assert.h>
23#include <stdio.h>
24#include <stdlib.h>
25
26#include "deflate.h"
27#include "lz77.h"
28#include "squeeze.h"
29#include "tree.h"
30#include "util.h"
31
32/*
33The "f" for the FindMinimum function below.
34i: the current parameter of f(i)
35context: for your implementation
36*/
37typedef double FindMinimumFun(size_t i, void* context);
38
39/*
40Finds minimum of function f(i) where is is of type size_t, f(i) is of type
41double, i is in range start-end (excluding end).
42*/
43static size_t FindMinimum(FindMinimumFun f, void* context,
44                          size_t start, size_t end) {
45  if (end - start < 1024) {
46    double best = ZOPFLI_LARGE_FLOAT;
47    size_t result = start;
48    size_t i;
49    for (i = start; i < end; i++) {
50      double v = f(i, context);
51      if (v < best) {
52        best = v;
53        result = i;
54      }
55    }
56    return result;
57  } else {
58    /* Try to find minimum faster by recursively checking multiple points. */
59#define NUM 9  /* Good value: 9. */
60    size_t i;
61    size_t p[NUM];
62    double vp[NUM];
63    size_t besti;
64    double best;
65    double lastbest = ZOPFLI_LARGE_FLOAT;
66    size_t pos = start;
67
68    for (;;) {
69      if (end - start <= NUM) break;
70
71      for (i = 0; i < NUM; i++) {
72        p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
73        vp[i] = f(p[i], context);
74      }
75      besti = 0;
76      best = vp[0];
77      for (i = 1; i < NUM; i++) {
78        if (vp[i] < best) {
79          best = vp[i];
80          besti = i;
81        }
82      }
83      if (best > lastbest) break;
84
85      start = besti == 0 ? start : p[besti - 1];
86      end = besti == NUM - 1 ? end : p[besti + 1];
87
88      pos = p[besti];
89      lastbest = best;
90    }
91    return pos;
92#undef NUM
93  }
94}
95
96/*
97Returns estimated cost of a block in bits.  It includes the size to encode the
98tree and the size to encode all literal, length and distance symbols and their
99extra bits.
100
101litlens: lz77 lit/lengths
102dists: ll77 distances
103lstart: start of block
104lend: end of block (not inclusive)
105*/
106static double EstimateCost(const unsigned short* litlens,
107                           const unsigned short* dists,
108                           size_t lstart, size_t lend) {
109  return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2);
110}
111
112typedef struct SplitCostContext {
113  const unsigned short* litlens;
114  const unsigned short* dists;
115  size_t llsize;
116  size_t start;
117  size_t end;
118} SplitCostContext;
119
120
121/*
122Gets the cost which is the sum of the cost of the left and the right section
123of the data.
124type: FindMinimumFun
125*/
126static double SplitCost(size_t i, void* context) {
127  SplitCostContext* c = (SplitCostContext*)context;
128  return EstimateCost(c->litlens, c->dists, c->start, i) +
129      EstimateCost(c->litlens, c->dists, i, c->end);
130}
131
132static void AddSorted(size_t value, size_t** out, size_t* outsize) {
133  size_t i;
134  ZOPFLI_APPEND_DATA(value, out, outsize);
135  for (i = 0; i + 1 < *outsize; i++) {
136    if ((*out)[i] > value) {
137      size_t j;
138      for (j = *outsize - 1; j > i; j--) {
139        (*out)[j] = (*out)[j - 1];
140      }
141      (*out)[i] = value;
142      break;
143    }
144  }
145}
146
147/*
148Prints the block split points as decimal and hex values in the terminal.
149*/
150static void PrintBlockSplitPoints(const unsigned short* litlens,
151                                  const unsigned short* dists,
152                                  size_t llsize, const size_t* lz77splitpoints,
153                                  size_t nlz77points) {
154  size_t* splitpoints = 0;
155  size_t npoints = 0;
156  size_t i;
157  /* The input is given as lz77 indices, but we want to see the uncompressed
158  index values. */
159  size_t pos = 0;
160  if (nlz77points > 0) {
161    for (i = 0; i < llsize; i++) {
162      size_t length = dists[i] == 0 ? 1 : litlens[i];
163      if (lz77splitpoints[npoints] == i) {
164        ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
165        if (npoints == nlz77points) break;
166      }
167      pos += length;
168    }
169  }
170  assert(npoints == nlz77points);
171
172  fprintf(stderr, "block split points: ");
173  for (i = 0; i < npoints; i++) {
174    fprintf(stderr, "%d ", (int)splitpoints[i]);
175  }
176  fprintf(stderr, "(hex:");
177  for (i = 0; i < npoints; i++) {
178    fprintf(stderr, " %x", (int)splitpoints[i]);
179  }
180  fprintf(stderr, ")\n");
181
182  free(splitpoints);
183}
184
185/*
186Finds next block to try to split, the largest of the available ones.
187The largest is chosen to make sure that if only a limited amount of blocks is
188requested, their sizes are spread evenly.
189llsize: the size of the LL77 data, which is the size of the done array here.
190done: array indicating which blocks starting at that position are no longer
191    splittable (splitting them increases rather than decreases cost).
192splitpoints: the splitpoints found so far.
193npoints: the amount of splitpoints found so far.
194lstart: output variable, giving start of block.
195lend: output variable, giving end of block.
196returns 1 if a block was found, 0 if no block found (all are done).
197*/
198static int FindLargestSplittableBlock(
199    size_t llsize, const unsigned char* done,
200    const size_t* splitpoints, size_t npoints,
201    size_t* lstart, size_t* lend) {
202  size_t longest = 0;
203  int found = 0;
204  size_t i;
205  for (i = 0; i <= npoints; i++) {
206    size_t start = i == 0 ? 0 : splitpoints[i - 1];
207    size_t end = i == npoints ? llsize - 1 : splitpoints[i];
208    if (!done[start] && end - start > longest) {
209      *lstart = start;
210      *lend = end;
211      found = 1;
212      longest = end - start;
213    }
214  }
215  return found;
216}
217
218void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
219                          const unsigned short* litlens,
220                          const unsigned short* dists,
221                          size_t llsize, size_t maxblocks,
222                          size_t** splitpoints, size_t* npoints) {
223  size_t lstart, lend;
224  size_t i;
225  size_t llpos = 0;
226  size_t numblocks = 1;
227  unsigned char* done;
228  double splitcost, origcost;
229
230  if (llsize < 10) return;  /* This code fails on tiny files. */
231
232  done = (unsigned char*)malloc(llsize);
233  if (!done) exit(-1); /* Allocation failed. */
234  for (i = 0; i < llsize; i++) done[i] = 0;
235
236  lstart = 0;
237  lend = llsize;
238  for (;;) {
239    SplitCostContext c;
240
241    if (maxblocks > 0 && numblocks >= maxblocks) {
242      break;
243    }
244
245    c.litlens = litlens;
246    c.dists = dists;
247    c.llsize = llsize;
248    c.start = lstart;
249    c.end = lend;
250    assert(lstart < lend);
251    llpos = FindMinimum(SplitCost, &c, lstart + 1, lend);
252
253    assert(llpos > lstart);
254    assert(llpos < lend);
255
256    splitcost = EstimateCost(litlens, dists, lstart, llpos) +
257        EstimateCost(litlens, dists, llpos, lend);
258    origcost = EstimateCost(litlens, dists, lstart, lend);
259
260    if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
261      done[lstart] = 1;
262    } else {
263      AddSorted(llpos, splitpoints, npoints);
264      numblocks++;
265    }
266
267    if (!FindLargestSplittableBlock(
268        llsize, done, *splitpoints, *npoints, &lstart, &lend)) {
269      break;  /* No further split will probably reduce compression. */
270    }
271
272    if (lend - lstart < 10) {
273      break;
274    }
275  }
276
277  if (options->verbose) {
278    PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints);
279  }
280
281  free(done);
282}
283
284void ZopfliBlockSplit(const ZopfliOptions* options,
285                      const unsigned char* in, size_t instart, size_t inend,
286                      size_t maxblocks, size_t** splitpoints, size_t* npoints) {
287  size_t pos = 0;
288  size_t i;
289  ZopfliBlockState s;
290  size_t* lz77splitpoints = 0;
291  size_t nlz77points = 0;
292  ZopfliLZ77Store store;
293
294  ZopfliInitLZ77Store(&store);
295
296  s.options = options;
297  s.blockstart = instart;
298  s.blockend = inend;
299#ifdef ZOPFLI_LONGEST_MATCH_CACHE
300  s.lmc = 0;
301#endif
302
303  *npoints = 0;
304  *splitpoints = 0;
305
306  /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
307  results in better blocks. */
308  ZopfliLZ77Greedy(&s, in, instart, inend, &store);
309
310  ZopfliBlockSplitLZ77(options,
311                       store.litlens, store.dists, store.size, maxblocks,
312                       &lz77splitpoints, &nlz77points);
313
314  /* Convert LZ77 positions to positions in the uncompressed input. */
315  pos = instart;
316  if (nlz77points > 0) {
317    for (i = 0; i < store.size; i++) {
318      size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
319      if (lz77splitpoints[*npoints] == i) {
320        ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
321        if (*npoints == nlz77points) break;
322      }
323      pos += length;
324    }
325  }
326  assert(*npoints == nlz77points);
327
328  free(lz77splitpoints);
329  ZopfliCleanLZ77Store(&store);
330}
331
332void ZopfliBlockSplitSimple(const unsigned char* in,
333                            size_t instart, size_t inend,
334                            size_t blocksize,
335                            size_t** splitpoints, size_t* npoints) {
336  size_t i = instart;
337  while (i < inend) {
338    ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
339    i += blocksize;
340  }
341  (void)in;
342}
343