Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/apache/derby/iapi/store/access/BackingStoreHashtable.java


1   /*
2   
3      Derby - Class org.apache.derby.iapi.store.access.BackingStoreHashtable
4   
5      Copyright 1999, 2004 The Apache Software Foundation or its licensors, as applicable.
6   
7      Licensed under the Apache License, Version 2.0 (the "License");
8      you may not use this file except in compliance with the License.
9      You may obtain a copy of the License at
10  
11        http://www.apache.org/licenses/LICENSE-2.0
12  
13     Unless required by applicable law or agreed to in writing, software
14     distributed under the License is distributed on an "AS IS" BASIS,
15     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16     See the License for the specific language governing permissions and
17     limitations under the License.
18  
19   */
20  
21  package org.apache.derby.iapi.store.access;
22  
23  import org.apache.derby.iapi.services.sanity.SanityManager;
24  
25  import org.apache.derby.iapi.services.io.Storable;
26  
27  import org.apache.derby.iapi.error.StandardException; 
28  
29  import org.apache.derby.iapi.types.CloneableObject;
30  import org.apache.derby.iapi.types.DataValueDescriptor;
31  
32  import org.apache.derby.iapi.services.cache.ClassSize;
33  
34  import java.util.Enumeration;
35  import java.util.Hashtable;
36  import java.util.Properties; 
37  import java.util.Vector;
38  import java.util.NoSuchElementException;
39  
40  /**
41  A BackingStoreHashtable is a utility class which will store a set of rows into
42  an in memory hash table, or overflow the hash table to a tempory on disk 
43  structure.
44  <p>
45  All rows must contain the same number of columns, and the column at position
46  N of all the rows must have the same format id.  If the BackingStoreHashtable needs to be
47  overflowed to disk, then an arbitrary row will be chosen and used as a template
48  for creating the underlying overflow container.
49  
50  <p>
51  The hash table will be built logically as follows (actual implementation
52  may differ).  The important points are that the hash value is the standard
53  java hash value on the row[key_column_numbers[0], if key_column_numbers.length is 1,
54  or row[key_column_numbers[0, 1, ...]] if key_column_numbers.length > 1, 
55  and that duplicate detection is done by the standard java duplicate detection provided by 
56  java.util.Hashtable.
57  <p>
58  import java.util.Hashtable;
59  
60  hash_table = new Hashtable();
61  
62  Object[] row;
63  boolean  needsToClone = rowSource.needsToClone();
64  
65  while((row = rowSource.getNextRowFromRowSource()) != null)
66  {
67      if (needsToClone)
68          row = clone_row_from_row(row);
69  
70    Object key = KeyHasher.buildHashKey(row, key_column_numbers);
71  
72      if ((duplicate_value = 
73          hash_table.put(key, row)) != null)
74      {
75          Vector row_vec;
76  
77          // inserted a duplicate
78          if ((duplicate_value instanceof vector))
79          {
80              row_vec = (Vector) duplicate_value;
81          }
82          else
83          {
84              // allocate vector to hold duplicates
85              row_vec = new Vector(2);
86  
87              // insert original row into vector
88              row_vec.addElement(duplicate_value);
89  
90              // put the vector as the data rather than the row
91              hash_table.put(key, row_vec);
92          }
93          
94          // insert new row into vector
95          row_vec.addElement(row);
96      }
97  }
98  
99  **/
100 
101 public class BackingStoreHashtable
102 {
103 
104     /**************************************************************************
105      * Fields of the class
106      **************************************************************************
107      */
108     private TransactionController tc;
109     private Hashtable   hash_table;
110     private int[]       key_column_numbers;
111     private boolean     remove_duplicates;
112   private boolean    skipNullKeyColumns;
113     private Properties  auxillary_runtimestats;
114   private RowSource  row_source;
115     /* If max_inmemory_rowcnt > 0 then use that to decide when to spill to disk.
116      * Otherwise compute max_inmemory_size based on the JVM memory size when the BackingStoreHashtable
117      * is constructed and use that to decide when to spill to disk.
118      */
119     private long max_inmemory_rowcnt;
120     private long inmemory_rowcnt;
121     private long max_inmemory_size;
122     private boolean keepAfterCommit;
123 
124     private static int vectorSize; // The estimated number of bytes used by Vector(0)
125     static {
126         try
127         {
128             vectorSize = ClassSize.estimateBase( java.util.Vector.class);
129         }
130         catch( SecurityException se)
131         {
132             vectorSize = 4*ClassSize.refSize;
133         }
134     };
135     
136     private DiskHashtable diskHashtable;
137 
138     /**************************************************************************
139      * Constructors for This class:
140      **************************************************************************
141      */
142     private BackingStoreHashtable(){}
143 
144     /**
145      * Create the BackingStoreHashtable from a row source.
146      * <p>
147      * This routine drains the RowSource.  The performance characteristics
148      * depends on the number of rows inserted and the parameters to the 
149      * constructor.  
150      * <p>
151      * If the number of rows is <= "max_inmemory_rowcnt", then the rows are
152      * inserted into a java.util.Hashtable.  In this case no 
153      * TransactionController is necessary, a "null" tc is valid.
154      * <p>
155      * If the number of rows is > "max_inmemory_rowcnt", then the rows will
156      * be all placed in some sort of Access temporary file on disk.  This 
157      * case requires a valid TransactionController.
158      *
159      * @param tc                An open TransactionController to be used if the
160      *                          hash table needs to overflow to disk.
161      *
162      * @param row_source        RowSource to read rows from.
163      *
164      * @param key_column_numbers The column numbers of the columns in the
165      *                          scan result row to be the key to the Hashtable.
166      *                          "0" is the first column in the scan result
167      *                          row (which may be different than the first
168      *                          row in the table of the scan).
169      *
170      * @param remove_duplicates Should the Hashtable automatically remove
171      *                          duplicates, or should it create the Vector of
172      *                          duplicates?
173      *
174      * @param estimated_rowcnt  The estimated number of rows in the hash table.
175      *                          Pass in -1 if there is no estimate.
176      *
177      * @param max_inmemory_rowcnt
178      *                          The maximum number of rows to insert into the 
179      *                          inmemory Hash table before overflowing to disk.
180      *                          Pass in -1 if there is no maximum.
181      *
182      * @param initialCapacity   If not "-1" used to initialize the java 
183      *                          Hashtable.
184      *
185      * @param loadFactor        If not "-1" used to initialize the java 
186      *                          Hashtable.
187    *
188    * @param skipNullKeyColumns  Skip rows with a null key column, if true.
189      *
190      * @param keepAfterCommit If true the hash table is kept after a commit,
191      *                        if false the hash table is dropped on the next commit.
192      *
193      *
194    * @exception  StandardException  Standard exception policy.
195      **/
196     public BackingStoreHashtable(
197     TransactionController   tc,
198     RowSource               row_source,
199     int[]                   key_column_numbers,
200     boolean                 remove_duplicates,
201     long                    estimated_rowcnt,
202     long                    max_inmemory_rowcnt,
203     int                     initialCapacity,
204     float                   loadFactor,
205   boolean          skipNullKeyColumns,
206     boolean                 keepAfterCommit)
207         throws StandardException
208     {
209         this.key_column_numbers    = key_column_numbers;
210         this.remove_duplicates    = remove_duplicates;
211     this.row_source         = row_source;
212     this.skipNullKeyColumns     = skipNullKeyColumns;
213         this.max_inmemory_rowcnt = max_inmemory_rowcnt;
214         if( max_inmemory_rowcnt > 0)
215             max_inmemory_size = Long.MAX_VALUE;
216         else
217             max_inmemory_size = Runtime.getRuntime().totalMemory()/100;
218         this.tc = tc;
219         this.keepAfterCommit = keepAfterCommit;
220 
221         Object[] row;
222 
223         // use passed in capacity and loadfactor if not -1, you must specify
224         // capacity if you want to specify loadfactor.
225         if (initialCapacity != -1)
226         {
227             hash_table = 
228                 ((loadFactor == -1) ? 
229                      new Hashtable(initialCapacity) : 
230                      new Hashtable(initialCapacity, loadFactor));
231         }
232         else
233         {
234             hash_table = 
235                 ((estimated_rowcnt <= 0) ? 
236                      new Hashtable() : new Hashtable((int) estimated_rowcnt));
237         }
238 
239         if (row_source != null)
240         {
241             boolean needsToClone = row_source.needsToClone();
242 
243             while ((row = getNextRowFromRowSource()) != null)
244             {
245 
246                 if (needsToClone)
247                 {
248                     row = cloneRow(row);
249                 }
250 
251                 Object key = 
252                     KeyHasher.buildHashKey(row, key_column_numbers);
253 
254                 add_row_to_hash_table(hash_table, key, row);
255             }
256         }
257     }
258 
259     /**************************************************************************
260      * Private/Protected methods of This class:
261      **************************************************************************
262      */
263 
264   /**
265    * Call method to either get next row or next row with non-null
266    * key columns.
267    *
268      *
269    * @exception  StandardException  Standard exception policy.
270    */
271   private Object[] getNextRowFromRowSource()
272     throws StandardException
273   {
274     Object[] row = row_source.getNextRowFromRowSource();
275 
276     if (skipNullKeyColumns)
277     {
278       while (row != null)
279       {
280         // Are any key columns null?
281         int index = 0;
282         for ( ; index < key_column_numbers.length; index++)
283         {
284           if (SanityManager.DEBUG)
285           {
286             if (! (row[key_column_numbers[index]] instanceof Storable))
287             {
288               SanityManager.THROWASSERT(
289                 "row[key_column_numbers[index]] expected to be Storable, not " +
290                 row[key_column_numbers[index]].getClass().getName());
291             }
292           }
293           Storable storable = (Storable) row[key_column_numbers[index]];
294           if (storable.isNull())
295           {
296             break;
297           }
298         }
299         // No null key columns
300         if (index == key_column_numbers.length)
301         {
302           return row;
303         }
304         // 1 or more null key columns
305         row = row_source.getNextRowFromRowSource();
306       }
307     }
308     return row;
309   }
310 
311     /**
312      * Return a cloned copy of the row.
313      *
314    * @return The cloned row row to use.
315      *
316    * @exception  StandardException  Standard exception policy.
317      **/
318     static Object[] cloneRow(Object[] old_row)
319         throws StandardException
320     {
321         Object[] new_row = new DataValueDescriptor[old_row.length];
322 
323     // the only difference between getClone and cloneObject is cloneObject does
324     // not objectify a stream.  We use getClone here.  Beetle 4896.
325         for (int i = 0; i < old_row.length; i++)
326         {
327             if( old_row[i] != null)
328                 new_row[i] = ((DataValueDescriptor) old_row[i]).getClone();
329         }
330 
331         return(new_row);
332     }
333 
334     /**
335      * Do the work to add one row to the hash table.
336      * <p>
337      *
338      * @param row               Row to add to the hash table.
339      * @param hash_table        The java HashTable to load into.
340      *
341    * @exception  StandardException  Standard exception policy.
342      **/
343     private void add_row_to_hash_table(
344     Hashtable   hash_table,
345     Object      key,
346     Object[]    row)
347     throws StandardException
348     {
349         if( spillToDisk( hash_table, key, row))
350             return;
351         
352         Object  duplicate_value = null;
353 
354         if ((duplicate_value = hash_table.put(key, row)) == null)
355             doSpaceAccounting( row, false);
356         else
357         {
358             if (!remove_duplicates)
359             {
360                 Vector row_vec;
361 
362                 // inserted a duplicate
363                 if ((duplicate_value instanceof Vector))
364                 {
365                     doSpaceAccounting( row, false);
366                     row_vec = (Vector) duplicate_value;
367                 }
368                 else
369                 {
370                     // allocate vector to hold duplicates
371                     row_vec = new Vector(2);
372 
373                     // insert original row into vector
374                     row_vec.addElement(duplicate_value);
375                     doSpaceAccounting( row, true);
376                 }
377 
378                 // insert new row into vector
379                 row_vec.addElement(row);
380 
381                 // store vector of rows back into hash table,
382                 // overwriting the duplicate key that was 
383                 // inserted.
384                 hash_table.put(key, row_vec);
385             }
386         }
387 
388         row = null;
389     }
390 
391     private void doSpaceAccounting( Object[] row,
392                                     boolean firstDuplicate)
393     {
394         inmemory_rowcnt++;
395         if( max_inmemory_rowcnt <= 0)
396         {
397             for( int i = 0; i < row.length; i++)
398             {
399                 if( row[i] instanceof DataValueDescriptor)
400                     max_inmemory_size -= ((DataValueDescriptor) row[i]).estimateMemoryUsage();
401                 max_inmemory_size -= ClassSize.refSize;
402             }
403             max_inmemory_size -= ClassSize.refSize;
404             if( firstDuplicate)
405                 max_inmemory_size -= vectorSize;
406         }
407     } // end of doSpaceAccounting
408 
409     /**
410      * Determine whether a new row should be spilled to disk and, if so, do it.
411      *
412      * @param hash_table The in-memory hash table
413      * @param key The row's key
414      * @param row
415      *
416      * @return true if the row was spilled to disk, false if not
417      *
418      * @exception  StandardException  Standard exception policy.
419      */
420     private boolean spillToDisk( Hashtable   hash_table,
421                                  Object      key,
422                                  Object[]    row)
423     throws StandardException
424     {
425         // Once we have started spilling all new rows will go to disk, even if we have freed up some
426         // memory by moving duplicates to disk. This simplifies handling of duplicates and accounting.
427         if( diskHashtable == null)
428         {
429             if( max_inmemory_rowcnt > 0)
430             {
431                 if( inmemory_rowcnt < max_inmemory_rowcnt)
432                     return false; // Do not spill
433             }
434             else if( max_inmemory_size > 0)
435                 return false;
436             // Want to start spilling
437             if( ! (row instanceof DataValueDescriptor[]))
438             {
439                 if( SanityManager.DEBUG)
440                     SanityManager.THROWASSERT( "BackingStoreHashtable row is not DataValueDescriptor[]");
441                 // Do not know how to put it on disk
442                 return false;
443             }
444             diskHashtable = new DiskHashtable( tc,
445                                                (DataValueDescriptor[]) row,
446                                                key_column_numbers,
447                                                remove_duplicates,
448                                                keepAfterCommit);
449         }
450         
451         Object duplicateValue = hash_table.get( key);
452         if( duplicateValue != null)
453         {
454             if( remove_duplicates)
455                 return true; // a degenerate case of spilling
456             // If we are keeping duplicates then move all the duplicates from memory to disk
457             // This simplifies finding duplicates: they are either all in memory or all on disk.
458             if( duplicateValue instanceof Vector)
459             {
460                 Vector duplicateVec = (Vector) duplicateValue;
461                 for( int i = duplicateVec.size() - 1; i >= 0; i--)
462                 {
463                     Object[] dupRow = (Object[]) duplicateVec.elementAt(i);
464                     diskHashtable.put( key, dupRow);
465                 }
466             }
467             else
468                 diskHashtable.put( key, (Object []) duplicateValue);
469             hash_table.remove( key);
470         }
471         diskHashtable.put( key, row);
472         return true;
473     } // end of spillToDisk
474     /**************************************************************************
475      * Public Methods of This class:
476      **************************************************************************
477      */
478 
479     /**
480      * Close the BackingStoreHashtable.
481      * <p>
482      * Perform any necessary cleanup after finishing with the hashtable.  Will
483      * deallocate/dereference objects as necessary.  If the table has gone
484      * to disk this will drop any on disk files used to support the hash table.
485      * <p>
486      *
487    * @exception  StandardException  Standard exception policy.
488      **/
489     public void close() 
490     throws StandardException
491     {
492         hash_table = null;
493         if( diskHashtable != null)
494         {
495             diskHashtable.close();
496             diskHashtable = null;
497         }
498         return;
499     }
500 
501     /**
502      * Return an Enumeration that can be used to scan entire table.
503      * <p>
504      * RESOLVE - is it worth it to support this routine when we have a
505      *           disk overflow hash table?
506      *
507    * @return The Enumeration.
508      *
509    * @exception  StandardException  Standard exception policy.
510      **/
511     public Enumeration elements()
512         throws StandardException
513     {
514         if( diskHashtable == null)
515             return(hash_table.elements());
516         return new BackingStoreHashtableEnumeration();
517     }
518 
519     /**
520      * get data associated with given key.
521      * <p>
522      * There are 2 different types of objects returned from this routine.
523      * <p>
524    * In both cases, the key value is either the object stored in 
525      * row[key_column_numbers[0]], if key_column_numbers.length is 1, 
526      * otherwise it is a KeyHasher containing
527    * the objects stored in row[key_column_numbers[0, 1, ...]].
528      * For every qualifying unique row value an entry is placed into the 
529      * Hashtable.
530      * <p>
531      * For row values with duplicates, the value of the data is a Vector of
532      * rows.
533      * <p>
534      * The caller will have to call "instanceof" on the data value
535      * object if duplicates are expected, to determine if the data value
536      * of the Hashtable entry is a row or is a Vector of rows.
537      * <p>
538      * The BackingStoreHashtable "owns" the objects returned from the get()
539      * routine.  They remain valid until the next access to the 
540      * BackingStoreHashtable.  If the client needs to keep references to these
541      * objects, it should clone copies of the objects.  A valid 
542      * BackingStoreHashtable can place all rows into a disk based conglomerate,
543      * declare a row buffer and then reuse that row buffer for every get()
544      * call.
545      *
546    * @return The value to which the key is mapped in this hashtable; 
547      *         null if the key is not mapped to any value in this hashtable.
548      *
549      * @param key    The key to hash on.
550      *
551    * @exception  StandardException  Standard exception policy.
552      **/
553     public Object get(Object key)
554     throws StandardException
555     {
556         Object obj = hash_table.get(key);
557         if( diskHashtable == null || obj != null)
558             return obj;
559         return diskHashtable.get( key);
560     }
561 
562     /**
563      * Return runtime stats to caller by adding them to prop.
564      * <p>
565      *
566      * @param prop   The set of properties to append to.
567      *
568    * @exception  StandardException  Standard exception policy.
569      **/
570     public void getAllRuntimeStats(Properties   prop)
571     throws StandardException
572     {
573         if (auxillary_runtimestats != null)
574             org.apache.derby.iapi.util.PropertyUtil.copyProperties(auxillary_runtimestats, prop);
575     }
576 
577     /**
578      * remove a row from the hash table.
579      * <p>
580      * a remove of a duplicate removes the entire duplicate list.
581      *
582      * @param key          The key of the row to remove.
583      *
584    * @exception  StandardException  Standard exception policy.
585      **/
586     public Object remove(
587     Object      key)
588     throws StandardException
589     {
590         Object obj = hash_table.remove(key);
591         if( obj != null || diskHashtable == null)
592             return obj;
593         return diskHashtable.remove(key);
594     }
595 
596     /**
597      * Set the auxillary runtime stats.
598      * <p>
599      * getRuntimeStats() will return both the auxillary stats and any
600      * BackingStoreHashtable() specific stats.  Note that each call to
601      * setAuxillaryRuntimeStats() overwrites the Property set that was
602      * set previously.
603      *
604      * @param prop   The set of properties to append from.
605      *
606    * @exception  StandardException  Standard exception policy.
607      **/
608     public void setAuxillaryRuntimeStats(Properties   prop)
609     throws StandardException
610     {
611         auxillary_runtimestats = prop;
612     }
613 
614     /**
615      * Put a row into the hash table.
616      * <p>
617      * The in memory hash table will need to keep a reference to the row
618      * after the put call has returned.  If "needsToClone" is true then the
619      * hash table will make a copy of the row and put that, else if 
620      * "needsToClone" is false then the hash table will keep a reference to
621      * the row passed in and no copy will be made.
622      * <p>
623      * If rouine returns false, then no reference is kept to the duplicate
624      * row which was rejected (thus allowing caller to reuse the object).
625      *
626      * @param needsToClone does this routine have to make a copy of the row,
627      *                     in order to keep a reference to it after return?
628      * @param row          The row to insert into the table.
629      *
630    * @return true if row was inserted into the hash table.  Returns
631      *              false if the BackingStoreHashtable is eliminating 
632      *              duplicates, and the row being inserted is a duplicate,
633    *        or if we are skipping rows with 1 or more null key columns
634    *        and we find a null key column.
635      *
636    * @exception  StandardException  Standard exception policy.
637      **/
638     public boolean put(
639     boolean     needsToClone,
640     Object[]    row)
641     throws StandardException
642     {
643     // Are any key columns null?
644     if (skipNullKeyColumns)
645     {
646       int index = 0;
647       for ( ; index < key_column_numbers.length; index++)
648       {
649         if (SanityManager.DEBUG)
650         {
651           if (! (row[key_column_numbers[index]] instanceof Storable))
652           {
653             SanityManager.THROWASSERT(
654               "row[key_column_numbers[index]] expected to be Storable, not " +
655               row[key_column_numbers[index]].getClass().getName());
656           }
657         }
658         Storable storable = (Storable) row[key_column_numbers[index]];
659         if (storable.isNull())
660         {
661           return false;
662         }
663       }
664     }
665 
666         if (needsToClone)
667         {
668             row = cloneRow(row);
669         }
670 
671         Object key = KeyHasher.buildHashKey(row, key_column_numbers);
672 
673         if ((remove_duplicates) && (get(key) != null))
674         {
675             return(false);
676         }
677         else
678         {
679             add_row_to_hash_table(hash_table, key, row);
680             return(true);
681         }
682     }
683 
684     /**
685      * Return number of unique rows in the hash table.
686      * <p>
687      *
688    * @return The number of unique rows in the hash table.
689      *
690    * @exception  StandardException  Standard exception policy.
691      **/
692     public int size()
693     throws StandardException
694     {
695         if( diskHashtable == null)
696             return(hash_table.size());
697         return hash_table.size() + diskHashtable.size();
698     }
699 
700     private class BackingStoreHashtableEnumeration implements Enumeration
701     {
702         private Enumeration memoryEnumeration;
703         private Enumeration diskEnumeration;
704 
705         BackingStoreHashtableEnumeration()
706         {
707             memoryEnumeration = hash_table.elements();
708             if( diskHashtable != null)
709             {
710                 try
711                 {
712                     diskEnumeration = diskHashtable.elements();
713                 }
714                 catch( StandardException se)
715                 {
716                     diskEnumeration = null;
717                 }
718             }
719         }
720         
721         public boolean hasMoreElements()
722         {
723             if( memoryEnumeration != null)
724             {
725                 if( memoryEnumeration.hasMoreElements())
726                     return true;
727                 memoryEnumeration = null;
728             }
729             if( diskEnumeration == null)
730                 return false;
731             return diskEnumeration.hasMoreElements();
732         }
733 
734         public Object nextElement() throws NoSuchElementException
735         {
736             if( memoryEnumeration != null)
737             {
738                 if( memoryEnumeration.hasMoreElements())
739                     return memoryEnumeration.nextElement();
740                 memoryEnumeration = null;
741             }
742             return diskEnumeration.nextElement();
743         }
744     } // end of class BackingStoreHashtableEnumeration
745 }