1 /* DeflaterHuffman.java --
2 Copyright (C) 2001, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GNU Classpath.
5
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
37
38 package java.util.zip;
39
40 /**
41 * This is the DeflaterHuffman class.
42 *
43 * This class is <i>not</i> thread safe. This is inherent in the API, due
44 * to the split of deflate and setInput.
45 *
46 * @author Jochen Hoenicke
47 * @date Jan 6, 2000
48 */
49 class DeflaterHuffman
50 {
51 private static final int BUFSIZE = 1 << (DeflaterConstants.DEFAULT_MEM_LEVEL + 6);
52 private static final int LITERAL_NUM = 286;
53 private static final int DIST_NUM = 30;
54 private static final int BITLEN_NUM = 19;
55 private static final int REP_3_6 = 16;
56 private static final int REP_3_10 = 17;
57 private static final int REP_11_138 = 18;
58 private static final int EOF_SYMBOL = 256;
59 private static final int[] BL_ORDER =
60 { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
61
62 private static final String bit4Reverse =
63 "\000\010\004\014\002\012\006\016\001\011\005\015\003\013\007\017";
64
65 class Tree {
66 short[] freqs;
67 short[] codes;
68 byte[] length;
69 int[] bl_counts;
70 int minNumCodes, numCodes;
71 int maxLength;
72
73 Tree(int elems, int minCodes, int maxLength) {
74 this.minNumCodes = minCodes;
75 this.maxLength = maxLength;
76 freqs = new short[elems];
77 bl_counts = new int[maxLength];
78 }
79
80 void reset() {
81 for (int i = 0; i < freqs.length; i++)
82 freqs[i] = 0;
83 codes = null;
84 length = null;
85 }
86
87 final void writeSymbol(int code)
88 {
89 if (DeflaterConstants.DEBUGGING)
90 {
91 freqs[code]--;
92 // System.err.print("writeSymbol("+freqs.length+","+code+"): ");
93 }
94 pending.writeBits(codes[code] & 0xffff, length[code]);
95 }
96
97 final void checkEmpty()
98 {
99 boolean empty = true;
100 for (int i = 0; i < freqs.length; i++)
101 if (freqs[i] != 0)
102 {
103 System.err.println("freqs["+i+"] == "+freqs[i]);
104 empty = false;
105 }
106 if (!empty)
107 throw new InternalError();
108 System.err.println("checkEmpty suceeded!");
109 }
110
111 void setStaticCodes(short[] stCodes, byte[] stLength)
112 {
113 codes = stCodes;
114 length = stLength;
115 }
116
117 public void buildCodes() {
118 int[] nextCode = new int[maxLength];
119 int code = 0;
120 codes = new short[freqs.length];
121
122 if (DeflaterConstants.DEBUGGING)
123 System.err.println("buildCodes: "+freqs.length);
124 for (int bits = 0; bits < maxLength; bits++)
125 {
126 nextCode[bits] = code;
127 code += bl_counts[bits] << (15 - bits);
128 if (DeflaterConstants.DEBUGGING)
129 System.err.println("bits: "+(bits+1)+" count: "+bl_counts[bits]
130 +" nextCode: "+Integer.toHexString(code));
131 }
132 if (DeflaterConstants.DEBUGGING && code != 65536)
133 throw new RuntimeException("Inconsistent bl_counts!");
134
135 for (int i=0; i < numCodes; i++)
136 {
137 int bits = length[i];
138 if (bits > 0)
139 {
140 if (DeflaterConstants.DEBUGGING)
141 System.err.println("codes["+i+"] = rev("
142 +Integer.toHexString(nextCode[bits-1])+"),"
143 +bits);
144 codes[i] = bitReverse(nextCode[bits-1]);
145 nextCode[bits-1] += 1 << (16 - bits);
146 }
147 }
148 }
149
150 private void buildLength(int childs[])
151 {
152 this.length = new byte [freqs.length];
153 int numNodes = childs.length / 2;
154 int numLeafs = (numNodes + 1) / 2;
155 int overflow = 0;
156
157 for (int i = 0; i < maxLength; i++)
158 bl_counts[i] = 0;
159
160 /* First calculate optimal bit lengths */
161 int lengths[] = new int[numNodes];
162 lengths[numNodes-1] = 0;
163 for (int i = numNodes - 1; i >= 0; i--)
164 {
165 if (childs[2*i+1] != -1)
166 {
167 int bitLength = lengths[i] + 1;
168 if (bitLength > maxLength)
169 {
170 bitLength = maxLength;
171 overflow++;
172 }
173 lengths[childs[2*i]] = lengths[childs[2*i+1]] = bitLength;
174 }
175 else
176 {
177 /* A leaf node */
178 int bitLength = lengths[i];
179 bl_counts[bitLength - 1]++;
180 this.length[childs[2*i]] = (byte) lengths[i];
181 }
182 }
183
184 if (DeflaterConstants.DEBUGGING)
185 {
186 System.err.println("Tree "+freqs.length+" lengths:");
187 for (int i=0; i < numLeafs; i++)
188 System.err.println("Node "+childs[2*i]+" freq: "+freqs[childs[2*i]]
189 + " len: "+length[childs[2*i]]);
190 }
191
192 if (overflow == 0)
193 return;
194
195 int incrBitLen = maxLength - 1;
196 do
197 {
198 /* Find the first bit length which could increase: */
199 while (bl_counts[--incrBitLen] == 0)
200 ;
201
202 /* Move this node one down and remove a corresponding
203 * amount of overflow nodes.
204 */
205 do
206 {
207 bl_counts[incrBitLen]--;
208 bl_counts[++incrBitLen]++;
209 overflow -= 1 << (maxLength - 1 - incrBitLen);
210 }
211 while (overflow > 0 && incrBitLen < maxLength - 1);
212 }
213 while (overflow > 0);
214
215 /* We may have overshot above. Move some nodes from maxLength to
216 * maxLength-1 in that case.
217 */
218 bl_counts[maxLength-1] += overflow;
219 bl_counts[maxLength-2] -= overflow;
220
221 /* Now recompute all bit lengths, scanning in increasing
222 * frequency. It is simpler to reconstruct all lengths instead of
223 * fixing only the wrong ones. This idea is taken from 'ar'
224 * written by Haruhiko Okumura.
225 *
226 * The nodes were inserted with decreasing frequency into the childs
227 * array.
228 */
229 int nodePtr = 2 * numLeafs;
230 for (int bits = maxLength; bits != 0; bits--)
231 {
232 int n = bl_counts[bits-1];
233 while (n > 0)
234 {
235 int childPtr = 2*childs[nodePtr++];
236 if (childs[childPtr + 1] == -1)
237 {
238 /* We found another leaf */
239 length[childs[childPtr]] = (byte) bits;
240 n--;
241 }
242 }
243 }
244 if (DeflaterConstants.DEBUGGING)
245 {
246 System.err.println("*** After overflow elimination. ***");
247 for (int i=0; i < numLeafs; i++)
248 System.err.println("Node "+childs[2*i]+" freq: "+freqs[childs[2*i]]
249 + " len: "+length[childs[2*i]]);
250 }
251 }
252
253 void buildTree()
254 {
255 int numSymbols = freqs.length;
256
257 /* heap is a priority queue, sorted by frequency, least frequent
258 * nodes first. The heap is a binary tree, with the property, that
259 * the parent node is smaller than both child nodes. This assures
260 * that the smallest node is the first parent.
261 *
262 * The binary tree is encoded in an array: 0 is root node and
263 * the nodes 2*n+1, 2*n+2 are the child nodes of node n.
264 */
265 int[] heap = new int[numSymbols];
266 int heapLen = 0;
267 int maxCode = 0;
268 for (int n = 0; n < numSymbols; n++)
269 {
270 int freq = freqs[n];
271 if (freq != 0)
272 {
273 /* Insert n into heap */
274 int pos = heapLen++;
275 int ppos;
276 while (pos > 0 &&
277 freqs[heap[ppos = (pos - 1) / 2]] > freq) {
278 heap[pos] = heap[ppos];
279 pos = ppos;
280 }
281 heap[pos] = n;
282 maxCode = n;
283 }
284 }
285
286 /* We could encode a single literal with 0 bits but then we
287 * don't see the literals. Therefore we force at least two
288 * literals to avoid this case. We don't care about order in
289 * this case, both literals get a 1 bit code.
290 */
291 while (heapLen < 2)
292 {
293 int node = maxCode < 2 ? ++maxCode : 0;
294 heap[heapLen++] = node;
295 }
296
297 numCodes = Math.max(maxCode + 1, minNumCodes);
298
299 int numLeafs = heapLen;
300 int[] childs = new int[4*heapLen - 2];
301 int[] values = new int[2*heapLen - 1];
302 int numNodes = numLeafs;
303 for (int i = 0; i < heapLen; i++)
304 {
305 int node = heap[i];
306 childs[2*i] = node;
307 childs[2*i+1] = -1;
308 values[i] = freqs[node] << 8;
309 heap[i] = i;
310 }
311
312 /* Construct the Huffman tree by repeatedly combining the least two
313 * frequent nodes.
314 */
315 do
316 {
317 int first = heap[0];
318 int last = heap[--heapLen];
319
320 /* Propagate the hole to the leafs of the heap */
321 int ppos = 0;
322 int path = 1;
323 while (path < heapLen)
324 {
325 if (path + 1 < heapLen
326 && values[heap[path]] > values[heap[path+1]])
327 path++;
328
329 heap[ppos] = heap[path];
330 ppos = path;
331 path = path * 2 + 1;
332 }
333
334 /* Now propagate the last element down along path. Normally
335 * it shouldn't go too deep.
336 */
337 int lastVal = values[last];
338 while ((path = ppos) > 0
339 && values[heap[ppos = (path - 1)/2]] > lastVal)
340 heap[path] = heap[ppos];
341 heap[path] = last;
342
343
344 int second = heap[0];
345
346 /* Create a new node father of first and second */
347 last = numNodes++;
348 childs[2*last] = first;
349 childs[2*last+1] = second;
350 int mindepth = Math.min(values[first] & 0xff, values[second] & 0xff);
351 values[last] = lastVal = values[first] + values[second] - mindepth + 1;
352
353 /* Again, propagate the hole to the leafs */
354 ppos = 0;
355 path = 1;
356 while (path < heapLen)
357 {
358 if (path + 1 < heapLen
359 && values[heap[path]] > values[heap[path+1]])
360 path++;
361
362 heap[ppos] = heap[path];
363 ppos = path;
364 path = ppos * 2 + 1;
365 }
366
367 /* Now propagate the new element down along path */
368 while ((path = ppos) > 0
369 && values[heap[ppos = (path - 1)/2]] > lastVal)
370 heap[path] = heap[ppos];
371 heap[path] = last;
372 }
373 while (heapLen > 1);
374
375 if (heap[0] != childs.length / 2 - 1)
376 throw new RuntimeException("Weird!");
377
378 buildLength(childs);
379 }
380
381 int getEncodedLength()
382 {
383 int len = 0;
384 for (int i = 0; i < freqs.length; i++)
385 len += freqs[i] * length[i];
386 return len;
387 }
388
389 void calcBLFreq(Tree blTree) {
390 int max_count; /* max repeat count */
391 int min_count; /* min repeat count */
392 int count; /* repeat count of the current code */
393 int curlen = -1; /* length of current code */
394
395 int i = 0;
396 while (i < numCodes)
397 {
398 count = 1;
399 int nextlen = length[i];
400 if (nextlen == 0)
401 {
402 max_count = 138;
403 min_count = 3;
404 }
405 else
406 {
407 max_count = 6;
408 min_count = 3;
409 if (curlen != nextlen)
410 {
411 blTree.freqs[nextlen]++;
412 count = 0;
413 }
414 }
415 curlen = nextlen;
416 i++;
417
418 while (i < numCodes && curlen == length[i])
419 {
420 i++;
421 if (++count >= max_count)
422 break;
423 }
424
425 if (count < min_count)
426 blTree.freqs[curlen] += count;
427 else if (curlen != 0)
428 blTree.freqs[REP_3_6]++;
429 else if (count <= 10)
430 blTree.freqs[REP_3_10]++;
431 else
432 blTree.freqs[REP_11_138]++;
433 }
434 }
435
436 void writeTree(Tree blTree)
437 {
438 int max_count; /* max repeat count */
439 int min_count; /* min repeat count */
440 int count; /* repeat count of the current code */
441 int curlen = -1; /* length of current code */
442
443 int i = 0;
444 while (i < numCodes)
445 {
446 count = 1;
447 int nextlen = length[i];
448 if (nextlen == 0)
449 {
450 max_count = 138;
451 min_count = 3;
452 }
453 else
454 {
455 max_count = 6;
456 min_count = 3;
457 if (curlen != nextlen)
458 {
459 blTree.writeSymbol(nextlen);
460 count = 0;
461 }
462 }
463 curlen = nextlen;
464 i++;
465
466 while (i < numCodes && curlen == length[i])
467 {
468 i++;
469 if (++count >= max_count)
470 break;
471 }
472
473 if (count < min_count)
474 {
475 while (count-- > 0)
476 blTree.writeSymbol(curlen);
477 }
478 else if (curlen != 0)
479 {
480 blTree.writeSymbol(REP_3_6);
481 pending.writeBits(count - 3, 2);
482 }
483 else if (count <= 10)
484 {
485 blTree.writeSymbol(REP_3_10);
486 pending.writeBits(count - 3, 3);
487 }
488 else
489 {
490 blTree.writeSymbol(REP_11_138);
491 pending.writeBits(count - 11, 7);
492 }
493 }
494 }
495 }
496
497
498
499 DeflaterPending pending;
500 private Tree literalTree, distTree, blTree;
501
502 private short d_buf[];
503 private byte l_buf[];
504 private int last_lit;
505 private int extra_bits;
506
507 private static short staticLCodes[];
508 private static byte staticLLength[];
509 private static short staticDCodes[];
510 private static byte staticDLength[];
511
512 /**
513 * Reverse the bits of a 16 bit value.
514 */
515 static short bitReverse(int value) {
516 return (short) (bit4Reverse.charAt(value & 0xf) << 12
517 | bit4Reverse.charAt((value >> 4) & 0xf) << 8
518 | bit4Reverse.charAt((value >> 8) & 0xf) << 4
519 | bit4Reverse.charAt(value >> 12));
520 }
521
522 static {
523 /* See RFC 1951 3.2.6 */
524 /* Literal codes */
525 staticLCodes = new short[LITERAL_NUM];
526 staticLLength = new byte[LITERAL_NUM];
527 int i = 0;
528 while (i < 144) {
529 staticLCodes[i] = bitReverse((0x030 + i) << 8);
530 staticLLength[i++] = 8;
531 }
532 while (i < 256) {
533 staticLCodes[i] = bitReverse((0x190 - 144 + i) << 7);
534 staticLLength[i++] = 9;
535 }
536 while (i < 280) {
537 staticLCodes[i] = bitReverse((0x000 - 256 + i) << 9);
538 staticLLength[i++] = 7;
539 }
540 while (i < LITERAL_NUM) {
541 staticLCodes[i] = bitReverse((0x0c0 - 280 + i) << 8);
542 staticLLength[i++] = 8;
543 }
544
545 /* Distant codes */
546 staticDCodes = new short[DIST_NUM];
547 staticDLength = new byte[DIST_NUM];
548 for (i = 0; i < DIST_NUM; i++) {
549 staticDCodes[i] = bitReverse(i << 11);
550 staticDLength[i] = 5;
551 }
552 }
553
554 public DeflaterHuffman(DeflaterPending pending)
555 {
556 this.pending = pending;
557
558 literalTree = new Tree(LITERAL_NUM, 257, 15);
559 distTree = new Tree(DIST_NUM, 1, 15);
560 blTree = new Tree(BITLEN_NUM, 4, 7);
561
562 d_buf = new short[BUFSIZE];
563 l_buf = new byte [BUFSIZE];
564 }
565
566 public final void reset() {
567 last_lit = 0;
568 extra_bits = 0;
569 literalTree.reset();
570 distTree.reset();
571 blTree.reset();
572 }
573
574 private int l_code(int len) {
575 if (len == 255)
576 return 285;
577
578 int code = 257;
579 while (len >= 8)
580 {
581 code += 4;
582 len >>= 1;
583 }
584 return code + len;
585 }
586
587 private int d_code(int distance) {
588 int code = 0;
589 while (distance >= 4)
590 {
591 code += 2;
592 distance >>= 1;
593 }
594 return code + distance;
595 }
596
597 public void sendAllTrees(int blTreeCodes) {
598 blTree.buildCodes();
599 literalTree.buildCodes();
600 distTree.buildCodes();
601 pending.writeBits(literalTree.numCodes - 257, 5);
602 pending.writeBits(distTree.numCodes - 1, 5);
603 pending.writeBits(blTreeCodes - 4, 4);
604 for (int rank = 0; rank < blTreeCodes; rank++)
605 pending.writeBits(blTree.length[BL_ORDER[rank]], 3);
606 literalTree.writeTree(blTree);
607 distTree.writeTree(blTree);
608 if (DeflaterConstants.DEBUGGING)
609 blTree.checkEmpty();
610 }
611
612 public void compressBlock() {
613 for (int i = 0; i < last_lit; i++)
614 {
615 int litlen = l_buf[i] & 0xff;
616 int dist = d_buf[i];
617 if (dist-- != 0)
618 {
619 if (DeflaterConstants.DEBUGGING)
620 System.err.print("["+(dist+1)+","+(litlen+3)+"]: ");
621
622 int lc = l_code(litlen);
623 literalTree.writeSymbol(lc);
624
625 int bits = (lc - 261) / 4;
626 if (bits > 0 && bits <= 5)
627 pending.writeBits(litlen & ((1 << bits) - 1), bits);
628
629 int dc = d_code(dist);
630 distTree.writeSymbol(dc);
631
632 bits = dc / 2 - 1;
633 if (bits > 0)
634 pending.writeBits(dist & ((1 << bits) - 1), bits);
635 }
636 else
637 {
638 if (DeflaterConstants.DEBUGGING)
639 {
640 if (litlen > 32 && litlen < 127)
641 System.err.print("("+(char)litlen+"): ");
642 else
643 System.err.print("{"+litlen+"}: ");
644 }
645 literalTree.writeSymbol(litlen);
646 }
647 }
648 if (DeflaterConstants.DEBUGGING)
649 System.err.print("EOF: ");
650 literalTree.writeSymbol(EOF_SYMBOL);
651 if (DeflaterConstants.DEBUGGING)
652 {
653 literalTree.checkEmpty();
654 distTree.checkEmpty();
655 }
656 }
657
658 public void flushStoredBlock(byte[] stored,
659 int stored_offset, int stored_len,
660 boolean lastBlock) {
661 if (DeflaterConstants.DEBUGGING)
662 System.err.println("Flushing stored block "+ stored_len);
663 pending.writeBits((DeflaterConstants.STORED_BLOCK << 1)
664 + (lastBlock ? 1 : 0), 3);
665 pending.alignToByte();
666 pending.writeShort(stored_len);
667 pending.writeShort(~stored_len);
668 pending.writeBlock(stored, stored_offset, stored_len);
669 reset();
670 }
671
672 public void flushBlock(byte[] stored, int stored_offset, int stored_len,
673 boolean lastBlock) {
674 literalTree.freqs[EOF_SYMBOL]++;
675
676 /* Build trees */
677 literalTree.buildTree();
678 distTree.buildTree();
679
680 /* Calculate bitlen frequency */
681 literalTree.calcBLFreq(blTree);
682 distTree.calcBLFreq(blTree);
683
684 /* Build bitlen tree */
685 blTree.buildTree();
686
687 int blTreeCodes = 4;
688 for (int i = 18; i > blTreeCodes; i--)
689 {
690 if (blTree.length[BL_ORDER[i]] > 0)
691 blTreeCodes = i+1;
692 }
693 int opt_len = 14 + blTreeCodes * 3 + blTree.getEncodedLength()
694 + literalTree.getEncodedLength() + distTree.getEncodedLength()
695 + extra_bits;
696
697 int static_len = extra_bits;
698 for (int i = 0; i < LITERAL_NUM; i++)
699 static_len += literalTree.freqs[i] * staticLLength[i];
700 for (int i = 0; i < DIST_NUM; i++)
701 static_len += distTree.freqs[i] * staticDLength[i];
702 if (opt_len >= static_len)
703 {
704 /* Force static trees */
705 opt_len = static_len;
706 }
707
708 if (stored_offset >= 0 && stored_len+4 < opt_len >> 3)
709 {
710 /* Store Block */
711 if (DeflaterConstants.DEBUGGING)
712 System.err.println("Storing, since " + stored_len + " < " + opt_len
713 + " <= " + static_len);
714 flushStoredBlock(stored, stored_offset, stored_len, lastBlock);
715 }
716 else if (opt_len == static_len)
717 {
718 /* Encode with static tree */
719 pending.writeBits((DeflaterConstants.STATIC_TREES << 1)
720 + (lastBlock ? 1 : 0), 3);
721 literalTree.setStaticCodes(staticLCodes, staticLLength);
722 distTree.setStaticCodes(staticDCodes, staticDLength);
723 compressBlock();
724 reset();
725 }
726 else
727 {
728 /* Encode with dynamic tree */
729 pending.writeBits((DeflaterConstants.DYN_TREES << 1)
730 + (lastBlock ? 1 : 0), 3);
731 sendAllTrees(blTreeCodes);
732 compressBlock();
733 reset();
734 }
735 }
736
737 public final boolean isFull()
738 {
739 return last_lit == BUFSIZE;
740 }
741
742 public final boolean tallyLit(int lit)
743 {
744 if (DeflaterConstants.DEBUGGING)
745 {
746 if (lit > 32 && lit < 127)
747 System.err.println("("+(char)lit+")");
748 else
749 System.err.println("{"+lit+"}");
750 }
751 d_buf[last_lit] = 0;
752 l_buf[last_lit++] = (byte) lit;
753 literalTree.freqs[lit]++;
754 return last_lit == BUFSIZE;
755 }
756
757 public final boolean tallyDist(int dist, int len)
758 {
759 if (DeflaterConstants.DEBUGGING)
760 System.err.println("["+dist+","+len+"]");
761
762 d_buf[last_lit] = (short) dist;
763 l_buf[last_lit++] = (byte) (len - 3);
764
765 int lc = l_code(len-3);
766 literalTree.freqs[lc]++;
767 if (lc >= 265 && lc < 285)
768 extra_bits += (lc - 261) / 4;
769
770 int dc = d_code(dist-1);
771 distTree.freqs[dc]++;
772 if (dc >= 4)
773 extra_bits += dc / 2 - 1;
774 return last_lit == BUFSIZE;
775 }
776 }