1/*
2Copyright 2011 Google Inc. All Rights Reserved.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15
16Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18*/
19
20/*
21Bounded package merge algorithm, based on the paper
22"A Fast and Space-Economical Algorithm for Length-Limited Coding
23Jyrki Katajainen, Alistair Moffat, Andrew Turpin".
24*/
25
26#include "katajainen.h"
27#include <assert.h>
28#include <stdlib.h>
29
30typedef struct Node Node;
31
32/*
33Nodes forming chains. Also used to represent leaves.
34*/
35struct Node {
36  size_t weight;  /* Total weight (symbol count) of this chain. */
37  Node* tail;  /* Previous node(s) of this chain, or 0 if none. */
38  int count;  /* Leaf symbol index, or number of leaves before this chain. */
39  char inuse;  /* Tracking for garbage collection. */
40};
41
42/*
43Memory pool for nodes.
44*/
45typedef struct NodePool {
46  Node* nodes;  /* The pool. */
47  Node* next;  /* Pointer to a possibly free node in the pool. */
48  int size;  /* Size of the memory pool. */
49} NodePool;
50
51/*
52Initializes a chain node with the given values and marks it as in use.
53*/
54static void InitNode(size_t weight, int count, Node* tail, Node* node) {
55  node->weight = weight;
56  node->count = count;
57  node->tail = tail;
58  node->inuse = 1;
59}
60
61/*
62Finds a free location in the memory pool. Performs garbage collection if needed.
63lists: If given, used to mark in-use nodes during garbage collection.
64maxbits: Size of lists.
65pool: Memory pool to get free node from.
66*/
67static Node* GetFreeNode(Node* (*lists)[2], int maxbits, NodePool* pool) {
68  for (;;) {
69    if (pool->next >= &pool->nodes[pool->size]) {
70      /* Garbage collection. */
71      int i;
72      for (i = 0; i < pool->size; i++) {
73        pool->nodes[i].inuse = 0;
74      }
75      if (lists) {
76        for (i = 0; i < maxbits * 2; i++) {
77          Node* node;
78          for (node = lists[i / 2][i % 2]; node; node = node->tail) {
79            node->inuse = 1;
80          }
81        }
82      }
83      pool->next = &pool->nodes[0];
84    }
85    if (!pool->next->inuse) break;  /* Found one. */
86    pool->next++;
87  }
88  return pool->next++;
89}
90
91
92/*
93Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
94new chain is, depending on the weights, a leaf or a combination of two chains
95from the previous list.
96lists: The lists of chains.
97maxbits: Number of lists.
98leaves: The leaves, one per symbol.
99numsymbols: Number of leaves.
100pool: the node memory pool.
101index: The index of the list in which a new chain or leaf is required.
102final: Whether this is the last time this function is called. If it is then it
103  is no more needed to recursively call self.
104*/
105static void BoundaryPM(Node* (*lists)[2], int maxbits,
106    Node* leaves, int numsymbols, NodePool* pool, int index, char final) {
107  Node* newchain;
108  Node* oldchain;
109  int lastcount = lists[index][1]->count;  /* Count of last chain of list. */
110
111  if (index == 0 && lastcount >= numsymbols) return;
112
113  newchain = GetFreeNode(lists, maxbits, pool);
114  oldchain = lists[index][1];
115
116  /* These are set up before the recursive calls below, so that there is a list
117  pointing to the new node, to let the garbage collection know it's in use. */
118  lists[index][0] = oldchain;
119  lists[index][1] = newchain;
120
121  if (index == 0) {
122    /* New leaf node in list 0. */
123    InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain);
124  } else {
125    size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;
126    if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
127      /* New leaf inserted in list, so count is incremented. */
128      InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail,
129          newchain);
130    } else {
131      InitNode(sum, lastcount, lists[index - 1][1], newchain);
132      if (!final) {
133        /* Two lookahead chains of previous list used up, create new ones. */
134        BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0);
135        BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0);
136      }
137    }
138  }
139}
140
141/*
142Initializes each list with as lookahead chains the two leaves with lowest
143weights.
144*/
145static void InitLists(
146    NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) {
147  int i;
148  Node* node0 = GetFreeNode(0, maxbits, pool);
149  Node* node1 = GetFreeNode(0, maxbits, pool);
150  InitNode(leaves[0].weight, 1, 0, node0);
151  InitNode(leaves[1].weight, 2, 0, node1);
152  for (i = 0; i < maxbits; i++) {
153    lists[i][0] = node0;
154    lists[i][1] = node1;
155  }
156}
157
158/*
159Converts result of boundary package-merge to the bitlengths. The result in the
160last chain of the last list contains the amount of active leaves in each list.
161chain: Chain to extract the bit length from (last chain from last list).
162*/
163static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) {
164  Node* node;
165  for (node = chain; node; node = node->tail) {
166    int i;
167    for (i = 0; i < node->count; i++) {
168      bitlengths[leaves[i].count]++;
169    }
170  }
171}
172
173/*
174Comparator for sorting the leaves. Has the function signature for qsort.
175*/
176static int LeafComparator(const void* a, const void* b) {
177  return ((const Node*)a)->weight - ((const Node*)b)->weight;
178}
179
180int ZopfliLengthLimitedCodeLengths(
181    const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) {
182  NodePool pool;
183  int i;
184  int numsymbols = 0;  /* Amount of symbols with frequency > 0. */
185  int numBoundaryPMRuns;
186
187  /* Array of lists of chains. Each list requires only two lookahead chains at
188  a time, so each list is a array of two Node*'s. */
189  Node* (*lists)[2];
190
191  /* One leaf per symbol. Only numsymbols leaves will be used. */
192  Node* leaves = (Node*)malloc(n * sizeof(*leaves));
193
194  /* Initialize all bitlengths at 0. */
195  for (i = 0; i < n; i++) {
196    bitlengths[i] = 0;
197  }
198
199  /* Count used symbols and place them in the leaves. */
200  for (i = 0; i < n; i++) {
201    if (frequencies[i]) {
202      leaves[numsymbols].weight = frequencies[i];
203      leaves[numsymbols].count = i;  /* Index of symbol this leaf represents. */
204      numsymbols++;
205    }
206  }
207
208  /* Check special cases and error conditions. */
209  if ((1 << maxbits) < numsymbols) {
210    free(leaves);
211    return 1;  /* Error, too few maxbits to represent symbols. */
212  }
213  if (numsymbols == 0) {
214    free(leaves);
215    return 0;  /* No symbols at all. OK. */
216  }
217  if (numsymbols == 1) {
218    bitlengths[leaves[0].count] = 1;
219    free(leaves);
220    return 0;  /* Only one symbol, give it bitlength 1, not 0. OK. */
221  }
222
223  /* Sort the leaves from lightest to heaviest. */
224  qsort(leaves, numsymbols, sizeof(Node), LeafComparator);
225
226  /* Initialize node memory pool. */
227  pool.size = 2 * maxbits * (maxbits + 1);
228  pool.nodes = (Node*)malloc(pool.size * sizeof(*pool.nodes));
229  pool.next = pool.nodes;
230  for (i = 0; i < pool.size; i++) {
231    pool.nodes[i].inuse = 0;
232  }
233
234  lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists));
235  InitLists(&pool, leaves, maxbits, lists);
236
237  /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two
238  are already created in the initialization. Each BoundaryPM run creates one. */
239  numBoundaryPMRuns = 2 * numsymbols - 4;
240  for (i = 0; i < numBoundaryPMRuns; i++) {
241    char final = i == numBoundaryPMRuns - 1;
242    BoundaryPM(lists, maxbits, leaves, numsymbols, &pool, maxbits - 1, final);
243  }
244
245  ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths);
246
247  free(lists);
248  free(leaves);
249  free(pool.nodes);
250  return 0;  /* OK. */
251}
252