Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: ClassLib/Common/java/util/zip/DeflaterHuffman.java


1   // DeflaterHuffman.java, created Mon Jul  8  4:06:18 2002 by joewhaley
2   // Copyright (C) 2001-3 John Whaley <jwhaley@alum.mit.edu>
3   // Licensed under the terms of the GNU LGPL; see COPYING for details.
4   package ClassLib.Common.java.util.zip;
5   
6   /**
7    * DeflaterHuffman
8    *
9    * @author  John Whaley <jwhaley@alum.mit.edu>
10   * @version $Id: DeflaterHuffman.java,v 1.5 2003/05/12 10:04:53 joewhaley Exp $
11   */
12  class DeflaterHuffman {
13  
14    private static final int BUFSIZE = 1 << (DeflaterConstants.DEFAULT_MEM_LEVEL + 6);
15    private static final int LITERAL_NUM = 286;
16    private static final int DIST_NUM = 30;
17    private static final int BITLEN_NUM = 19;
18    private static final int REP_3_6    = 16;
19    private static final int REP_3_10   = 17;
20    private static final int REP_11_138 = 18;
21    private static final int EOF_SYMBOL = 256;
22    private static final int[] BL_ORDER =
23    { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
24  
25    private static final String bit4Reverse =
26      "\000\010\004\014\002\012\006\016\001\011\005\015\003\013\007\017";
27  
28    class Tree {
29      short[] freqs;
30      short[] codes;
31      byte[]  length;
32      int[]   bl_counts;
33      int     minNumCodes, numCodes;
34      int     maxLength;
35  
36      Tree(int elems, int minCodes, int maxLength) {
37        this.minNumCodes = minCodes;
38        this.maxLength  = maxLength;
39        freqs  = new short[elems];
40        bl_counts = new int[maxLength];
41      }
42  
43      void reset() {
44        for (int i = 0; i < freqs.length; i++)
45          freqs[i] = 0;
46        codes = null;
47        length = null;
48      }
49  
50      final void writeSymbol(int code)
51      {
52        if (DeflaterConstants.DEBUGGING)
53          {
54            freqs[code]--;
55  //        System.err.print("writeSymbol("+freqs.length+","+code+"): ");
56          }
57        pending.writeBits(codes[code] & 0xffff, length[code]);
58      }
59  
60      final void checkEmpty()
61      {
62        boolean empty = true;
63        for (int i = 0; i < freqs.length; i++)
64          if (freqs[i] != 0)
65            {
66              System.err.println("freqs["+i+"] == "+freqs[i]);
67              empty = false;
68            }
69        if (!empty)
70          throw new InternalError();
71        System.err.println("checkEmpty suceeded!");
72      }
73  
74      void setStaticCodes(short[] stCodes, byte[] stLength)
75      {
76        codes = stCodes;
77        length = stLength;
78      }
79  
80      public void buildCodes() {
81        int numSymbols = freqs.length;
82        int[] nextCode = new int[maxLength];
83        int code = 0;
84        codes = new short[freqs.length];
85  
86        if (DeflaterConstants.DEBUGGING)
87          System.err.println("buildCodes: "+freqs.length);
88        for (int bits = 0; bits < maxLength; bits++) 
89          {
90            nextCode[bits] = code;
91            code += bl_counts[bits] << (15 - bits);
92            if (DeflaterConstants.DEBUGGING)
93              System.err.println("bits: "+(bits+1)+" count: "+bl_counts[bits]
94                                 +" nextCode: "+Integer.toHexString(code));
95          }
96        if (DeflaterConstants.DEBUGGING && code != 65536)
97          throw new RuntimeException("Inconsistent bl_counts!");
98        
99        for (int i=0; i < numCodes; i++)
100         {
101           int bits = length[i];
102           if (bits > 0)
103             {
104               if (DeflaterConstants.DEBUGGING)
105                 System.err.println("codes["+i+"] = rev("
106                                    +Integer.toHexString(nextCode[bits-1])+"),"
107                                    +bits);
108               codes[i] = bitReverse(nextCode[bits-1]);
109               nextCode[bits-1] += 1 << (16 - bits);
110             }
111         }
112     }
113 
114     private void buildLength(int childs[])
115     {
116       this.length = new byte [freqs.length];
117       int numNodes = childs.length / 2;
118       int numLeafs = (numNodes + 1) / 2;
119       int overflow = 0;
120       
121       for (int i = 0; i < maxLength; i++)
122         bl_counts[i] = 0;
123 
124       /* First calculate optimal bit lengths */
125       int lengths[] = new int[numNodes];
126       lengths[numNodes-1] = 0;
127       for (int i = numNodes - 1; i >= 0; i--)
128         {
129           if (childs[2*i+1] != -1)
130             {
131               int bitLength = lengths[i] + 1;
132               if (bitLength > maxLength)
133                 {
134                   bitLength = maxLength;
135                   overflow++;
136                 }
137               lengths[childs[2*i]] = lengths[childs[2*i+1]] = bitLength;
138             }
139           else
140             {
141               /* A leaf node */
142               int bitLength = lengths[i];
143               bl_counts[bitLength - 1]++;
144               this.length[childs[2*i]] = (byte) lengths[i];
145             }
146         }
147       
148       if (DeflaterConstants.DEBUGGING)
149         {
150           System.err.println("Tree "+freqs.length+" lengths:");
151           for (int i=0; i < numLeafs; i++)
152             System.err.println("Node "+childs[2*i]+" freq: "+freqs[childs[2*i]]
153                                + " len: "+length[childs[2*i]]);
154         }
155       
156       if (overflow == 0)
157         return;
158       
159       int incrBitLen = maxLength - 1;
160       do
161         {
162           /* Find the first bit length which could increase: */
163           while (bl_counts[--incrBitLen] == 0)
164             ;
165           
166           /* Move this node one down and remove a corresponding
167            * amount of overflow nodes.
168            */
169           do
170             {
171               bl_counts[incrBitLen]--;
172               bl_counts[++incrBitLen]++;
173               overflow -= 1 << (maxLength - 1 - incrBitLen);
174             }
175           while (overflow > 0 && incrBitLen < maxLength - 1);
176         }
177       while (overflow > 0);
178 
179       /* We may have overshot above.  Move some nodes from maxLength to
180        * maxLength-1 in that case.
181        */
182       bl_counts[maxLength-1] += overflow;
183       bl_counts[maxLength-2] -= overflow;
184       
185       /* Now recompute all bit lengths, scanning in increasing
186        * frequency.  It is simpler to reconstruct all lengths instead of
187        * fixing only the wrong ones. This idea is taken from 'ar'
188        * written by Haruhiko Okumura.
189        *
190        * The nodes were inserted with decreasing frequency into the childs
191        * array.
192        */
193       int nodePtr = 2 * numLeafs;
194       for (int bits = maxLength; bits != 0; bits--) 
195         {
196           int n = bl_counts[bits-1];
197           while (n > 0) 
198             {
199               int childPtr = 2*childs[nodePtr++];
200               if (childs[childPtr + 1] == -1)
201                 {
202                   /* We found another leaf */
203                   length[childs[childPtr]] = (byte) bits;
204                   n--;
205                 }
206             }
207         }
208       if (DeflaterConstants.DEBUGGING)
209         {
210           System.err.println("*** After overflow elimination. ***");
211           for (int i=0; i < numLeafs; i++)
212             System.err.println("Node "+childs[2*i]+" freq: "+freqs[childs[2*i]]
213                                + " len: "+length[childs[2*i]]);
214         }
215     }
216     
217     void buildTree() 
218     {
219       int numSymbols = freqs.length;
220 
221       /* heap is a priority queue, sorted by frequency, least frequent
222        * nodes first.  The heap is a binary tree, with the property, that
223        * the parent node is smaller than both child nodes.  This assures
224        * that the smallest node is the first parent.
225        *
226        * The binary tree is encoded in an array:  0 is root node and
227        * the nodes 2*n+1, 2*n+2 are the child nodes of node n.
228        */
229       int[] heap = new int[numSymbols];
230       int heapLen = 0;
231       int maxCode = 0;
232       for (int n = 0; n < numSymbols; n++)
233         {
234           int freq = freqs[n];
235           if (freq != 0)
236             {
237               /* Insert n into heap */
238               int pos = heapLen++;
239               int ppos;
240               while (pos > 0 &&
241                      freqs[heap[ppos = (pos - 1) / 2]] > freq) {
242                 heap[pos] = heap[ppos];
243                 pos = ppos;
244               }
245               heap[pos] = n;
246               maxCode = n;
247             }
248         }
249       
250       /* We could encode a single literal with 0 bits but then we
251        * don't see the literals.  Therefore we force at least two
252        * literals to avoid this case.  We don't care about order in
253        * this case, both literals get a 1 bit code.  
254        */
255       while (heapLen < 2)
256         {
257           int node = maxCode < 2 ? ++maxCode : 0;
258           heap[heapLen++] = node;
259         }
260 
261       numCodes = Math.max(maxCode + 1, minNumCodes);
262       
263       int numLeafs = heapLen;
264       int[] childs = new int[4*heapLen - 2];
265       int[] values = new int[2*heapLen - 1];
266       int numNodes = numLeafs;
267       for (int i = 0; i < heapLen; i++)
268         {
269           int node = heap[i];
270           childs[2*i]   = node;
271           childs[2*i+1] = -1;
272           values[i] = freqs[node] << 8;
273           heap[i] = i;
274         }
275       
276       /* Construct the Huffman tree by repeatedly combining the least two
277        * frequent nodes.
278        */
279       do
280         {
281           int first = heap[0];
282           int last  = heap[--heapLen];
283           
284           /* Propagate the hole to the leafs of the heap */
285           int ppos = 0;
286           int path = 1;
287           while (path < heapLen)
288             {
289               if (path + 1 < heapLen 
290                   && values[heap[path]] > values[heap[path+1]])
291                 path++;
292               
293               heap[ppos] = heap[path];
294               ppos = path;
295               path = path * 2 + 1;
296             }
297           
298           /* Now propagate the last element down along path.  Normally
299            * it shouldn't go too deep.
300            */
301           int lastVal = values[last];
302           while ((path = ppos) > 0
303                  && values[heap[ppos = (path - 1)/2]] > lastVal)
304             heap[path] = heap[ppos];
305           heap[path] = last;
306           
307           
308           int second = heap[0];
309           
310           /* Create a new node father of first and second */
311           last = numNodes++;
312           childs[2*last] = first;
313           childs[2*last+1] = second;
314           int mindepth = Math.min(values[first] & 0xff, values[second] & 0xff);
315           values[last] = lastVal = values[first] + values[second] - mindepth + 1;
316           
317           /* Again, propagate the hole to the leafs */
318           ppos = 0;
319           path = 1;
320           while (path < heapLen)
321             {
322               if (path + 1 < heapLen 
323                   && values[heap[path]] > values[heap[path+1]])
324                 path++;
325               
326               heap[ppos] = heap[path];
327               ppos = path;
328               path = ppos * 2 + 1;
329             }
330           
331           /* Now propagate the new element down along path */
332           while ((path = ppos) > 0
333                  && values[heap[ppos = (path - 1)/2]] > lastVal)
334             heap[path] = heap[ppos];
335           heap[path] = last;
336         }
337       while (heapLen > 1);
338       
339       if (heap[0] != childs.length / 2 - 1)
340         throw new RuntimeException("Weird!");
341       
342       buildLength(childs);
343     }
344 
345     int getEncodedLength() 
346     {
347       int len = 0;
348       for (int i = 0; i < freqs.length; i++)
349         len += freqs[i] * length[i];
350       return len;
351     }
352 
353     void calcBLFreq(Tree blTree) {
354       int max_count;               /* max repeat count */
355       int min_count;               /* min repeat count */
356       int count;                   /* repeat count of the current code */
357       int curlen = -1;             /* length of current code */
358       
359       int i = 0;
360       while (i < numCodes)
361         {
362           count = 1;
363           int nextlen = length[i];
364           if (nextlen == 0) 
365             {
366               max_count = 138;
367               min_count = 3;
368             }
369           else
370             {
371               max_count = 6;
372               min_count = 3;
373               if (curlen != nextlen)
374                 {
375                   blTree.freqs[nextlen]++;
376                   count = 0;
377                 }
378             }
379           curlen = nextlen;
380           i++;
381 
382           while (i < numCodes && curlen == length[i])
383             {
384               i++;
385               if (++count >= max_count)
386                 break;
387             }
388 
389           if (count < min_count)
390             blTree.freqs[curlen] += count;
391           else if (curlen != 0)
392             blTree.freqs[REP_3_6]++;
393           else if (count <= 10)
394             blTree.freqs[REP_3_10]++;
395           else
396             blTree.freqs[REP_11_138]++;
397         }
398     }
399 
400     void writeTree(Tree blTree)
401     {
402       int max_count;               /* max repeat count */
403       int min_count;               /* min repeat count */
404       int count;                   /* repeat count of the current code */
405       int curlen = -1;             /* length of current code */
406       
407       int i = 0;
408       while (i < numCodes)
409         {
410           count = 1;
411           int nextlen = length[i];
412           if (nextlen == 0) 
413             {
414               max_count = 138;
415               min_count = 3;
416             }
417           else
418             {
419               max_count = 6;
420               min_count = 3;
421               if (curlen != nextlen)
422                 {
423                   blTree.writeSymbol(nextlen);
424                   count = 0;
425                 }
426             }
427           curlen = nextlen;
428           i++;
429 
430           while (i < numCodes && curlen == length[i])
431             {
432               i++;
433               if (++count >= max_count)
434                 break;
435             }
436 
437           if (count < min_count)
438             {
439               while (count-- > 0)
440                 blTree.writeSymbol(curlen);
441             }
442           else if (curlen != 0)
443             {
444               blTree.writeSymbol(REP_3_6);
445               pending.writeBits(count - 3, 2);
446             }
447           else if (count <= 10)
448             {
449               blTree.writeSymbol(REP_3_10);
450               pending.writeBits(count - 3, 3);
451             }
452           else
453             {
454               blTree.writeSymbol(REP_11_138);
455               pending.writeBits(count - 11, 7);
456             }
457         }
458     }
459   }
460 
461 
462 
463   DeflaterPending pending;
464   private Tree literalTree, distTree, blTree;
465 
466   private short d_buf[];
467   private byte l_buf[];
468   private int last_lit;
469   private int extra_bits;
470 
471   private static short staticLCodes[];
472   private static byte  staticLLength[];
473   private static short staticDCodes[];
474   private static byte  staticDLength[];
475 
476   /**
477    * Reverse the bits of a 16 bit value.
478    */
479   static short bitReverse(int value) {
480     return (short) (bit4Reverse.charAt(value & 0xf) << 12
481                     | bit4Reverse.charAt((value >> 4) & 0xf) << 8
482                     | bit4Reverse.charAt((value >> 8) & 0xf) << 4
483                     | bit4Reverse.charAt(value >> 12));
484   }
485 
486   static {
487     /* See RFC 1951 3.2.6 */
488     /* Literal codes */
489     staticLCodes = new short[LITERAL_NUM];
490     staticLLength = new byte[LITERAL_NUM];
491     int i = 0;
492     while (i < 144) {
493       staticLCodes[i] = bitReverse((0x030 + i) << 8);
494       staticLLength[i++] = 8;
495     }
496     while (i < 256) {
497       staticLCodes[i] = bitReverse((0x190 - 144 + i) << 7);
498       staticLLength[i++] = 9;
499     }
500     while (i < 280) {
501       staticLCodes[i] = bitReverse((0x000 - 256 + i) << 9);
502       staticLLength[i++] = 7;
503     }
504     while (i < LITERAL_NUM) {
505       staticLCodes[i] = bitReverse((0x0c0 - 280 + i)  << 8);
506       staticLLength[i++] = 8;
507     }
508 
509     /* Distant codes */
510     staticDCodes = new short[DIST_NUM];
511     staticDLength = new byte[DIST_NUM];
512     for (i = 0; i < DIST_NUM; i++) {
513       staticDCodes[i] = bitReverse(i << 11);
514       staticDLength[i] = 5;
515     }
516   }
517     
518   public DeflaterHuffman(DeflaterPending pending) 
519   {
520     this.pending = pending;
521 
522     literalTree = new Tree(LITERAL_NUM, 257, 15);
523     distTree    = new Tree(DIST_NUM, 1, 15);
524     blTree      = new Tree(BITLEN_NUM, 4, 7);
525 
526     d_buf = new short[BUFSIZE];
527     l_buf = new byte [BUFSIZE];
528   }
529 
530   public final void reset() {
531     last_lit = 0;
532     extra_bits = 0;
533     literalTree.reset();
534     distTree.reset();
535     blTree.reset();
536   }
537 
538   private final int l_code(int len) {
539     if (len == 255)
540       return 285;
541 
542     int code = 257;
543     while (len >= 8)
544       {
545         code += 4;
546         len >>= 1;
547       }
548     return code + len;
549   }
550 
551   private final int d_code(int distance) {
552     int code = 0;
553     while (distance >= 4)
554       {
555         code += 2;
556         distance >>= 1;
557       }
558     return code + distance;
559   }
560 
561   public void sendAllTrees(int blTreeCodes) {
562     blTree.buildCodes();
563     literalTree.buildCodes();
564     distTree.buildCodes();
565     pending.writeBits(literalTree.numCodes - 257, 5);
566     pending.writeBits(distTree.numCodes - 1, 5);
567     pending.writeBits(blTreeCodes - 4, 4);
568     for (int rank = 0; rank < blTreeCodes; rank++)
569       pending.writeBits(blTree.length[BL_ORDER[rank]], 3);
570     literalTree.writeTree(blTree);
571     distTree.writeTree(blTree);
572     if (DeflaterConstants.DEBUGGING)
573       blTree.checkEmpty();
574   }
575 
576   public void compressBlock() {
577     for (int i = 0; i < last_lit; i++) 
578       {
579         int litlen = l_buf[i] & 0xff;
580         int dist = d_buf[i];
581         if (dist-- != 0)
582           {
583             if (DeflaterConstants.DEBUGGING)
584               System.err.print("["+(dist+1)+","+(litlen+3)+"]: ");
585 
586             int lc = l_code(litlen);
587             literalTree.writeSymbol(lc);
588 
589             int bits = (lc - 261) / 4;
590             if (bits > 0 && bits <= 5)
591               pending.writeBits(litlen & ((1 << bits) - 1), bits);
592 
593             int dc = d_code(dist);
594             distTree.writeSymbol(dc);
595 
596             bits = dc / 2 - 1;
597             if (bits > 0)
598               pending.writeBits(dist & ((1 << bits) - 1), bits);
599           }
600         else
601           {
602             if (DeflaterConstants.DEBUGGING)
603               {
604                 if (litlen > 32 && litlen < 127)
605                   System.err.print("("+(char)litlen+"): ");
606                 else
607                   System.err.print("{"+litlen+"}: ");
608               }
609             literalTree.writeSymbol(litlen);
610           }
611       }
612     if (DeflaterConstants.DEBUGGING)
613       System.err.print("EOF: ");
614     literalTree.writeSymbol(EOF_SYMBOL);
615     if (DeflaterConstants.DEBUGGING)
616       {
617         literalTree.checkEmpty();
618         distTree.checkEmpty();
619       }
620   }
621 
622   public void flushStoredBlock(byte[] stored, 
623                                int stored_offset, int stored_len,
624                                boolean lastBlock) {
625     if (DeflaterConstants.DEBUGGING)
626       System.err.println("Flushing stored block "+ stored_len);
627     pending.writeBits((DeflaterConstants.STORED_BLOCK << 1)
628                       + (lastBlock ? 1 : 0), 3);
629     pending.alignToByte();
630     pending.writeShort(stored_len);
631     pending.writeShort(~stored_len);
632     pending.writeBlock(stored, stored_offset, stored_len);
633     reset();
634   }
635 
636   public void flushBlock(byte[] stored, int stored_offset, int stored_len,
637                          boolean lastBlock) {
638     literalTree.freqs[EOF_SYMBOL]++;
639 
640     /* Build trees */
641     literalTree.buildTree();
642     distTree.buildTree();
643 
644     /* Calculate bitlen frequency */
645     literalTree.calcBLFreq(blTree);
646     distTree.calcBLFreq(blTree);
647 
648     /* Build bitlen tree */
649     blTree.buildTree();
650 
651     int blTreeCodes = 4;
652     for (int i = 18; i > blTreeCodes; i--)
653       {
654         if (blTree.length[BL_ORDER[i]] > 0)
655           blTreeCodes = i+1;
656       }
657     int opt_len = 14 + blTreeCodes * 3 + blTree.getEncodedLength()
658       + literalTree.getEncodedLength() + distTree.getEncodedLength()
659       + extra_bits;
660 
661     int static_len = extra_bits;
662     for (int i = 0; i < LITERAL_NUM; i++)
663       static_len += literalTree.freqs[i] * staticLLength[i];
664     for (int i = 0; i < DIST_NUM; i++)
665       static_len += distTree.freqs[i] * staticDLength[i];
666     if (opt_len >= static_len)
667       {
668         /* Force static trees */
669         opt_len = static_len;
670       }
671 
672     if (stored_offset >= 0 && stored_len+4 < opt_len >> 3)
673       {
674         /* Store Block */
675         if (DeflaterConstants.DEBUGGING)
676           System.err.println("Storing, since " + stored_len + " < " + opt_len
677                              + " <= " + static_len);
678         flushStoredBlock(stored, stored_offset, stored_len, lastBlock);
679       }
680     else if (opt_len == static_len)
681       {
682         /* Encode with static tree */
683         pending.writeBits((DeflaterConstants.STATIC_TREES << 1)
684                           + (lastBlock ? 1 : 0), 3);
685         literalTree.setStaticCodes(staticLCodes, staticLLength);
686         distTree.setStaticCodes(staticDCodes, staticDLength);
687         compressBlock();
688         reset();
689       }
690     else
691       {
692         /* Encode with dynamic tree */
693         pending.writeBits((DeflaterConstants.DYN_TREES << 1)
694                           + (lastBlock ? 1 : 0), 3);
695         sendAllTrees(blTreeCodes);
696         compressBlock();
697         reset();
698       }
699   }
700 
701   public final boolean isFull()
702   {
703     return last_lit == BUFSIZE;
704   }
705 
706   public final boolean tallyLit(int lit) 
707   {
708     if (DeflaterConstants.DEBUGGING)
709       {
710         if (lit > 32 && lit < 127)
711           System.err.println("("+(char)lit+")");
712         else
713           System.err.println("{"+lit+"}");
714       }
715     d_buf[last_lit] = 0;
716     l_buf[last_lit++] = (byte) lit;
717     literalTree.freqs[lit]++;
718     return last_lit == BUFSIZE;
719   }
720 
721   public final boolean tallyDist(int dist, int len) 
722   {
723     if (DeflaterConstants.DEBUGGING)
724       System.err.println("["+dist+","+len+"]");
725 
726     d_buf[last_lit] = (short) dist;
727     l_buf[last_lit++] = (byte) (len - 3);
728 
729     int lc = l_code(len-3);
730     literalTree.freqs[lc]++;
731     if (lc >= 265 && lc < 285)
732       extra_bits += (lc - 261) / 4;
733 
734     int dc = d_code(dist-1);
735     distTree.freqs[dc]++;
736     if (dc >= 4)
737       extra_bits += dc / 2 - 1;
738     return last_lit == BUFSIZE;
739   }
740   
741 }