Source code: ClassLib/Common/java/util/zip/DeflaterHuffman.java
1 // DeflaterHuffman.java, created Mon Jul 8 4:06:18 2002 by joewhaley
2 // Copyright (C) 2001-3 John Whaley <jwhaley@alum.mit.edu>
3 // Licensed under the terms of the GNU LGPL; see COPYING for details.
4 package ClassLib.Common.java.util.zip;
5
6 /**
7 * DeflaterHuffman
8 *
9 * @author John Whaley <jwhaley@alum.mit.edu>
10 * @version $Id: DeflaterHuffman.java,v 1.5 2003/05/12 10:04:53 joewhaley Exp $
11 */
12 class DeflaterHuffman {
13
14 private static final int BUFSIZE = 1 << (DeflaterConstants.DEFAULT_MEM_LEVEL + 6);
15 private static final int LITERAL_NUM = 286;
16 private static final int DIST_NUM = 30;
17 private static final int BITLEN_NUM = 19;
18 private static final int REP_3_6 = 16;
19 private static final int REP_3_10 = 17;
20 private static final int REP_11_138 = 18;
21 private static final int EOF_SYMBOL = 256;
22 private static final int[] BL_ORDER =
23 { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
24
25 private static final String bit4Reverse =
26 "\000\010\004\014\002\012\006\016\001\011\005\015\003\013\007\017";
27
28 class Tree {
29 short[] freqs;
30 short[] codes;
31 byte[] length;
32 int[] bl_counts;
33 int minNumCodes, numCodes;
34 int maxLength;
35
36 Tree(int elems, int minCodes, int maxLength) {
37 this.minNumCodes = minCodes;
38 this.maxLength = maxLength;
39 freqs = new short[elems];
40 bl_counts = new int[maxLength];
41 }
42
43 void reset() {
44 for (int i = 0; i < freqs.length; i++)
45 freqs[i] = 0;
46 codes = null;
47 length = null;
48 }
49
50 final void writeSymbol(int code)
51 {
52 if (DeflaterConstants.DEBUGGING)
53 {
54 freqs[code]--;
55 // System.err.print("writeSymbol("+freqs.length+","+code+"): ");
56 }
57 pending.writeBits(codes[code] & 0xffff, length[code]);
58 }
59
60 final void checkEmpty()
61 {
62 boolean empty = true;
63 for (int i = 0; i < freqs.length; i++)
64 if (freqs[i] != 0)
65 {
66 System.err.println("freqs["+i+"] == "+freqs[i]);
67 empty = false;
68 }
69 if (!empty)
70 throw new InternalError();
71 System.err.println("checkEmpty suceeded!");
72 }
73
74 void setStaticCodes(short[] stCodes, byte[] stLength)
75 {
76 codes = stCodes;
77 length = stLength;
78 }
79
80 public void buildCodes() {
81 int numSymbols = freqs.length;
82 int[] nextCode = new int[maxLength];
83 int code = 0;
84 codes = new short[freqs.length];
85
86 if (DeflaterConstants.DEBUGGING)
87 System.err.println("buildCodes: "+freqs.length);
88 for (int bits = 0; bits < maxLength; bits++)
89 {
90 nextCode[bits] = code;
91 code += bl_counts[bits] << (15 - bits);
92 if (DeflaterConstants.DEBUGGING)
93 System.err.println("bits: "+(bits+1)+" count: "+bl_counts[bits]
94 +" nextCode: "+Integer.toHexString(code));
95 }
96 if (DeflaterConstants.DEBUGGING && code != 65536)
97 throw new RuntimeException("Inconsistent bl_counts!");
98
99 for (int i=0; i < numCodes; i++)
100 {
101 int bits = length[i];
102 if (bits > 0)
103 {
104 if (DeflaterConstants.DEBUGGING)
105 System.err.println("codes["+i+"] = rev("
106 +Integer.toHexString(nextCode[bits-1])+"),"
107 +bits);
108 codes[i] = bitReverse(nextCode[bits-1]);
109 nextCode[bits-1] += 1 << (16 - bits);
110 }
111 }
112 }
113
114 private void buildLength(int childs[])
115 {
116 this.length = new byte [freqs.length];
117 int numNodes = childs.length / 2;
118 int numLeafs = (numNodes + 1) / 2;
119 int overflow = 0;
120
121 for (int i = 0; i < maxLength; i++)
122 bl_counts[i] = 0;
123
124 /* First calculate optimal bit lengths */
125 int lengths[] = new int[numNodes];
126 lengths[numNodes-1] = 0;
127 for (int i = numNodes - 1; i >= 0; i--)
128 {
129 if (childs[2*i+1] != -1)
130 {
131 int bitLength = lengths[i] + 1;
132 if (bitLength > maxLength)
133 {
134 bitLength = maxLength;
135 overflow++;
136 }
137 lengths[childs[2*i]] = lengths[childs[2*i+1]] = bitLength;
138 }
139 else
140 {
141 /* A leaf node */
142 int bitLength = lengths[i];
143 bl_counts[bitLength - 1]++;
144 this.length[childs[2*i]] = (byte) lengths[i];
145 }
146 }
147
148 if (DeflaterConstants.DEBUGGING)
149 {
150 System.err.println("Tree "+freqs.length+" lengths:");
151 for (int i=0; i < numLeafs; i++)
152 System.err.println("Node "+childs[2*i]+" freq: "+freqs[childs[2*i]]
153 + " len: "+length[childs[2*i]]);
154 }
155
156 if (overflow == 0)
157 return;
158
159 int incrBitLen = maxLength - 1;
160 do
161 {
162 /* Find the first bit length which could increase: */
163 while (bl_counts[--incrBitLen] == 0)
164 ;
165
166 /* Move this node one down and remove a corresponding
167 * amount of overflow nodes.
168 */
169 do
170 {
171 bl_counts[incrBitLen]--;
172 bl_counts[++incrBitLen]++;
173 overflow -= 1 << (maxLength - 1 - incrBitLen);
174 }
175 while (overflow > 0 && incrBitLen < maxLength - 1);
176 }
177 while (overflow > 0);
178
179 /* We may have overshot above. Move some nodes from maxLength to
180 * maxLength-1 in that case.
181 */
182 bl_counts[maxLength-1] += overflow;
183 bl_counts[maxLength-2] -= overflow;
184
185 /* Now recompute all bit lengths, scanning in increasing
186 * frequency. It is simpler to reconstruct all lengths instead of
187 * fixing only the wrong ones. This idea is taken from 'ar'
188 * written by Haruhiko Okumura.
189 *
190 * The nodes were inserted with decreasing frequency into the childs
191 * array.
192 */
193 int nodePtr = 2 * numLeafs;
194 for (int bits = maxLength; bits != 0; bits--)
195 {
196 int n = bl_counts[bits-1];
197 while (n > 0)
198 {
199 int childPtr = 2*childs[nodePtr++];
200 if (childs[childPtr + 1] == -1)
201 {
202 /* We found another leaf */
203 length[childs[childPtr]] = (byte) bits;
204 n--;
205 }
206 }
207 }
208 if (DeflaterConstants.DEBUGGING)
209 {
210 System.err.println("*** After overflow elimination. ***");
211 for (int i=0; i < numLeafs; i++)
212 System.err.println("Node "+childs[2*i]+" freq: "+freqs[childs[2*i]]
213 + " len: "+length[childs[2*i]]);
214 }
215 }
216
217 void buildTree()
218 {
219 int numSymbols = freqs.length;
220
221 /* heap is a priority queue, sorted by frequency, least frequent
222 * nodes first. The heap is a binary tree, with the property, that
223 * the parent node is smaller than both child nodes. This assures
224 * that the smallest node is the first parent.
225 *
226 * The binary tree is encoded in an array: 0 is root node and
227 * the nodes 2*n+1, 2*n+2 are the child nodes of node n.
228 */
229 int[] heap = new int[numSymbols];
230 int heapLen = 0;
231 int maxCode = 0;
232 for (int n = 0; n < numSymbols; n++)
233 {
234 int freq = freqs[n];
235 if (freq != 0)
236 {
237 /* Insert n into heap */
238 int pos = heapLen++;
239 int ppos;
240 while (pos > 0 &&
241 freqs[heap[ppos = (pos - 1) / 2]] > freq) {
242 heap[pos] = heap[ppos];
243 pos = ppos;
244 }
245 heap[pos] = n;
246 maxCode = n;
247 }
248 }
249
250 /* We could encode a single literal with 0 bits but then we
251 * don't see the literals. Therefore we force at least two
252 * literals to avoid this case. We don't care about order in
253 * this case, both literals get a 1 bit code.
254 */
255 while (heapLen < 2)
256 {
257 int node = maxCode < 2 ? ++maxCode : 0;
258 heap[heapLen++] = node;
259 }
260
261 numCodes = Math.max(maxCode + 1, minNumCodes);
262
263 int numLeafs = heapLen;
264 int[] childs = new int[4*heapLen - 2];
265 int[] values = new int[2*heapLen - 1];
266 int numNodes = numLeafs;
267 for (int i = 0; i < heapLen; i++)
268 {
269 int node = heap[i];
270 childs[2*i] = node;
271 childs[2*i+1] = -1;
272 values[i] = freqs[node] << 8;
273 heap[i] = i;
274 }
275
276 /* Construct the Huffman tree by repeatedly combining the least two
277 * frequent nodes.
278 */
279 do
280 {
281 int first = heap[0];
282 int last = heap[--heapLen];
283
284 /* Propagate the hole to the leafs of the heap */
285 int ppos = 0;
286 int path = 1;
287 while (path < heapLen)
288 {
289 if (path + 1 < heapLen
290 && values[heap[path]] > values[heap[path+1]])
291 path++;
292
293 heap[ppos] = heap[path];
294 ppos = path;
295 path = path * 2 + 1;
296 }
297
298 /* Now propagate the last element down along path. Normally
299 * it shouldn't go too deep.
300 */
301 int lastVal = values[last];
302 while ((path = ppos) > 0
303 && values[heap[ppos = (path - 1)/2]] > lastVal)
304 heap[path] = heap[ppos];
305 heap[path] = last;
306
307
308 int second = heap[0];
309
310 /* Create a new node father of first and second */
311 last = numNodes++;
312 childs[2*last] = first;
313 childs[2*last+1] = second;
314 int mindepth = Math.min(values[first] & 0xff, values[second] & 0xff);
315 values[last] = lastVal = values[first] + values[second] - mindepth + 1;
316
317 /* Again, propagate the hole to the leafs */
318 ppos = 0;
319 path = 1;
320 while (path < heapLen)
321 {
322 if (path + 1 < heapLen
323 && values[heap[path]] > values[heap[path+1]])
324 path++;
325
326 heap[ppos] = heap[path];
327 ppos = path;
328 path = ppos * 2 + 1;
329 }
330
331 /* Now propagate the new element down along path */
332 while ((path = ppos) > 0
333 && values[heap[ppos = (path - 1)/2]] > lastVal)
334 heap[path] = heap[ppos];
335 heap[path] = last;
336 }
337 while (heapLen > 1);
338
339 if (heap[0] != childs.length / 2 - 1)
340 throw new RuntimeException("Weird!");
341
342 buildLength(childs);
343 }
344
345 int getEncodedLength()
346 {
347 int len = 0;
348 for (int i = 0; i < freqs.length; i++)
349 len += freqs[i] * length[i];
350 return len;
351 }
352
353 void calcBLFreq(Tree blTree) {
354 int max_count; /* max repeat count */
355 int min_count; /* min repeat count */
356 int count; /* repeat count of the current code */
357 int curlen = -1; /* length of current code */
358
359 int i = 0;
360 while (i < numCodes)
361 {
362 count = 1;
363 int nextlen = length[i];
364 if (nextlen == 0)
365 {
366 max_count = 138;
367 min_count = 3;
368 }
369 else
370 {
371 max_count = 6;
372 min_count = 3;
373 if (curlen != nextlen)
374 {
375 blTree.freqs[nextlen]++;
376 count = 0;
377 }
378 }
379 curlen = nextlen;
380 i++;
381
382 while (i < numCodes && curlen == length[i])
383 {
384 i++;
385 if (++count >= max_count)
386 break;
387 }
388
389 if (count < min_count)
390 blTree.freqs[curlen] += count;
391 else if (curlen != 0)
392 blTree.freqs[REP_3_6]++;
393 else if (count <= 10)
394 blTree.freqs[REP_3_10]++;
395 else
396 blTree.freqs[REP_11_138]++;
397 }
398 }
399
400 void writeTree(Tree blTree)
401 {
402 int max_count; /* max repeat count */
403 int min_count; /* min repeat count */
404 int count; /* repeat count of the current code */
405 int curlen = -1; /* length of current code */
406
407 int i = 0;
408 while (i < numCodes)
409 {
410 count = 1;
411 int nextlen = length[i];
412 if (nextlen == 0)
413 {
414 max_count = 138;
415 min_count = 3;
416 }
417 else
418 {
419 max_count = 6;
420 min_count = 3;
421 if (curlen != nextlen)
422 {
423 blTree.writeSymbol(nextlen);
424 count = 0;
425 }
426 }
427 curlen = nextlen;
428 i++;
429
430 while (i < numCodes && curlen == length[i])
431 {
432 i++;
433 if (++count >= max_count)
434 break;
435 }
436
437 if (count < min_count)
438 {
439 while (count-- > 0)
440 blTree.writeSymbol(curlen);
441 }
442 else if (curlen != 0)
443 {
444 blTree.writeSymbol(REP_3_6);
445 pending.writeBits(count - 3, 2);
446 }
447 else if (count <= 10)
448 {
449 blTree.writeSymbol(REP_3_10);
450 pending.writeBits(count - 3, 3);
451 }
452 else
453 {
454 blTree.writeSymbol(REP_11_138);
455 pending.writeBits(count - 11, 7);
456 }
457 }
458 }
459 }
460
461
462
463 DeflaterPending pending;
464 private Tree literalTree, distTree, blTree;
465
466 private short d_buf[];
467 private byte l_buf[];
468 private int last_lit;
469 private int extra_bits;
470
471 private static short staticLCodes[];
472 private static byte staticLLength[];
473 private static short staticDCodes[];
474 private static byte staticDLength[];
475
476 /**
477 * Reverse the bits of a 16 bit value.
478 */
479 static short bitReverse(int value) {
480 return (short) (bit4Reverse.charAt(value & 0xf) << 12
481 | bit4Reverse.charAt((value >> 4) & 0xf) << 8
482 | bit4Reverse.charAt((value >> 8) & 0xf) << 4
483 | bit4Reverse.charAt(value >> 12));
484 }
485
486 static {
487 /* See RFC 1951 3.2.6 */
488 /* Literal codes */
489 staticLCodes = new short[LITERAL_NUM];
490 staticLLength = new byte[LITERAL_NUM];
491 int i = 0;
492 while (i < 144) {
493 staticLCodes[i] = bitReverse((0x030 + i) << 8);
494 staticLLength[i++] = 8;
495 }
496 while (i < 256) {
497 staticLCodes[i] = bitReverse((0x190 - 144 + i) << 7);
498 staticLLength[i++] = 9;
499 }
500 while (i < 280) {
501 staticLCodes[i] = bitReverse((0x000 - 256 + i) << 9);
502 staticLLength[i++] = 7;
503 }
504 while (i < LITERAL_NUM) {
505 staticLCodes[i] = bitReverse((0x0c0 - 280 + i) << 8);
506 staticLLength[i++] = 8;
507 }
508
509 /* Distant codes */
510 staticDCodes = new short[DIST_NUM];
511 staticDLength = new byte[DIST_NUM];
512 for (i = 0; i < DIST_NUM; i++) {
513 staticDCodes[i] = bitReverse(i << 11);
514 staticDLength[i] = 5;
515 }
516 }
517
518 public DeflaterHuffman(DeflaterPending pending)
519 {
520 this.pending = pending;
521
522 literalTree = new Tree(LITERAL_NUM, 257, 15);
523 distTree = new Tree(DIST_NUM, 1, 15);
524 blTree = new Tree(BITLEN_NUM, 4, 7);
525
526 d_buf = new short[BUFSIZE];
527 l_buf = new byte [BUFSIZE];
528 }
529
530 public final void reset() {
531 last_lit = 0;
532 extra_bits = 0;
533 literalTree.reset();
534 distTree.reset();
535 blTree.reset();
536 }
537
538 private final int l_code(int len) {
539 if (len == 255)
540 return 285;
541
542 int code = 257;
543 while (len >= 8)
544 {
545 code += 4;
546 len >>= 1;
547 }
548 return code + len;
549 }
550
551 private final int d_code(int distance) {
552 int code = 0;
553 while (distance >= 4)
554 {
555 code += 2;
556 distance >>= 1;
557 }
558 return code + distance;
559 }
560
561 public void sendAllTrees(int blTreeCodes) {
562 blTree.buildCodes();
563 literalTree.buildCodes();
564 distTree.buildCodes();
565 pending.writeBits(literalTree.numCodes - 257, 5);
566 pending.writeBits(distTree.numCodes - 1, 5);
567 pending.writeBits(blTreeCodes - 4, 4);
568 for (int rank = 0; rank < blTreeCodes; rank++)
569 pending.writeBits(blTree.length[BL_ORDER[rank]], 3);
570 literalTree.writeTree(blTree);
571 distTree.writeTree(blTree);
572 if (DeflaterConstants.DEBUGGING)
573 blTree.checkEmpty();
574 }
575
576 public void compressBlock() {
577 for (int i = 0; i < last_lit; i++)
578 {
579 int litlen = l_buf[i] & 0xff;
580 int dist = d_buf[i];
581 if (dist-- != 0)
582 {
583 if (DeflaterConstants.DEBUGGING)
584 System.err.print("["+(dist+1)+","+(litlen+3)+"]: ");
585
586 int lc = l_code(litlen);
587 literalTree.writeSymbol(lc);
588
589 int bits = (lc - 261) / 4;
590 if (bits > 0 && bits <= 5)
591 pending.writeBits(litlen & ((1 << bits) - 1), bits);
592
593 int dc = d_code(dist);
594 distTree.writeSymbol(dc);
595
596 bits = dc / 2 - 1;
597 if (bits > 0)
598 pending.writeBits(dist & ((1 << bits) - 1), bits);
599 }
600 else
601 {
602 if (DeflaterConstants.DEBUGGING)
603 {
604 if (litlen > 32 && litlen < 127)
605 System.err.print("("+(char)litlen+"): ");
606 else
607 System.err.print("{"+litlen+"}: ");
608 }
609 literalTree.writeSymbol(litlen);
610 }
611 }
612 if (DeflaterConstants.DEBUGGING)
613 System.err.print("EOF: ");
614 literalTree.writeSymbol(EOF_SYMBOL);
615 if (DeflaterConstants.DEBUGGING)
616 {
617 literalTree.checkEmpty();
618 distTree.checkEmpty();
619 }
620 }
621
622 public void flushStoredBlock(byte[] stored,
623 int stored_offset, int stored_len,
624 boolean lastBlock) {
625 if (DeflaterConstants.DEBUGGING)
626 System.err.println("Flushing stored block "+ stored_len);
627 pending.writeBits((DeflaterConstants.STORED_BLOCK << 1)
628 + (lastBlock ? 1 : 0), 3);
629 pending.alignToByte();
630 pending.writeShort(stored_len);
631 pending.writeShort(~stored_len);
632 pending.writeBlock(stored, stored_offset, stored_len);
633 reset();
634 }
635
636 public void flushBlock(byte[] stored, int stored_offset, int stored_len,
637 boolean lastBlock) {
638 literalTree.freqs[EOF_SYMBOL]++;
639
640 /* Build trees */
641 literalTree.buildTree();
642 distTree.buildTree();
643
644 /* Calculate bitlen frequency */
645 literalTree.calcBLFreq(blTree);
646 distTree.calcBLFreq(blTree);
647
648 /* Build bitlen tree */
649 blTree.buildTree();
650
651 int blTreeCodes = 4;
652 for (int i = 18; i > blTreeCodes; i--)
653 {
654 if (blTree.length[BL_ORDER[i]] > 0)
655 blTreeCodes = i+1;
656 }
657 int opt_len = 14 + blTreeCodes * 3 + blTree.getEncodedLength()
658 + literalTree.getEncodedLength() + distTree.getEncodedLength()
659 + extra_bits;
660
661 int static_len = extra_bits;
662 for (int i = 0; i < LITERAL_NUM; i++)
663 static_len += literalTree.freqs[i] * staticLLength[i];
664 for (int i = 0; i < DIST_NUM; i++)
665 static_len += distTree.freqs[i] * staticDLength[i];
666 if (opt_len >= static_len)
667 {
668 /* Force static trees */
669 opt_len = static_len;
670 }
671
672 if (stored_offset >= 0 && stored_len+4 < opt_len >> 3)
673 {
674 /* Store Block */
675 if (DeflaterConstants.DEBUGGING)
676 System.err.println("Storing, since " + stored_len + " < " + opt_len
677 + " <= " + static_len);
678 flushStoredBlock(stored, stored_offset, stored_len, lastBlock);
679 }
680 else if (opt_len == static_len)
681 {
682 /* Encode with static tree */
683 pending.writeBits((DeflaterConstants.STATIC_TREES << 1)
684 + (lastBlock ? 1 : 0), 3);
685 literalTree.setStaticCodes(staticLCodes, staticLLength);
686 distTree.setStaticCodes(staticDCodes, staticDLength);
687 compressBlock();
688 reset();
689 }
690 else
691 {
692 /* Encode with dynamic tree */
693 pending.writeBits((DeflaterConstants.DYN_TREES << 1)
694 + (lastBlock ? 1 : 0), 3);
695 sendAllTrees(blTreeCodes);
696 compressBlock();
697 reset();
698 }
699 }
700
701 public final boolean isFull()
702 {
703 return last_lit == BUFSIZE;
704 }
705
706 public final boolean tallyLit(int lit)
707 {
708 if (DeflaterConstants.DEBUGGING)
709 {
710 if (lit > 32 && lit < 127)
711 System.err.println("("+(char)lit+")");
712 else
713 System.err.println("{"+lit+"}");
714 }
715 d_buf[last_lit] = 0;
716 l_buf[last_lit++] = (byte) lit;
717 literalTree.freqs[lit]++;
718 return last_lit == BUFSIZE;
719 }
720
721 public final boolean tallyDist(int dist, int len)
722 {
723 if (DeflaterConstants.DEBUGGING)
724 System.err.println("["+dist+","+len+"]");
725
726 d_buf[last_lit] = (short) dist;
727 l_buf[last_lit++] = (byte) (len - 3);
728
729 int lc = l_code(len-3);
730 literalTree.freqs[lc]++;
731 if (lc >= 265 && lc < 285)
732 extra_bits += (lc - 261) / 4;
733
734 int dc = d_code(dist-1);
735 distTree.freqs[dc]++;
736 if (dc >= 4)
737 extra_bits += dc / 2 - 1;
738 return last_lit == BUFSIZE;
739 }
740
741 }