Source code: org/apache/derby/iapi/store/access/BackingStoreHashtable.java
1 /*
2
3 Derby - Class org.apache.derby.iapi.store.access.BackingStoreHashtable
4
5 Copyright 1999, 2004 The Apache Software Foundation or its licensors, as applicable.
6
7 Licensed under the Apache License, Version 2.0 (the "License");
8 you may not use this file except in compliance with the License.
9 You may obtain a copy of the License at
10
11 http://www.apache.org/licenses/LICENSE-2.0
12
13 Unless required by applicable law or agreed to in writing, software
14 distributed under the License is distributed on an "AS IS" BASIS,
15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 See the License for the specific language governing permissions and
17 limitations under the License.
18
19 */
20
21 package org.apache.derby.iapi.store.access;
22
23 import org.apache.derby.iapi.services.sanity.SanityManager;
24
25 import org.apache.derby.iapi.services.io.Storable;
26
27 import org.apache.derby.iapi.error.StandardException;
28
29 import org.apache.derby.iapi.types.CloneableObject;
30 import org.apache.derby.iapi.types.DataValueDescriptor;
31
32 import org.apache.derby.iapi.services.cache.ClassSize;
33
34 import java.util.Enumeration;
35 import java.util.Hashtable;
36 import java.util.Properties;
37 import java.util.Vector;
38 import java.util.NoSuchElementException;
39
40 /**
41 A BackingStoreHashtable is a utility class which will store a set of rows into
42 an in memory hash table, or overflow the hash table to a tempory on disk
43 structure.
44 <p>
45 All rows must contain the same number of columns, and the column at position
46 N of all the rows must have the same format id. If the BackingStoreHashtable needs to be
47 overflowed to disk, then an arbitrary row will be chosen and used as a template
48 for creating the underlying overflow container.
49
50 <p>
51 The hash table will be built logically as follows (actual implementation
52 may differ). The important points are that the hash value is the standard
53 java hash value on the row[key_column_numbers[0], if key_column_numbers.length is 1,
54 or row[key_column_numbers[0, 1, ...]] if key_column_numbers.length > 1,
55 and that duplicate detection is done by the standard java duplicate detection provided by
56 java.util.Hashtable.
57 <p>
58 import java.util.Hashtable;
59
60 hash_table = new Hashtable();
61
62 Object[] row;
63 boolean needsToClone = rowSource.needsToClone();
64
65 while((row = rowSource.getNextRowFromRowSource()) != null)
66 {
67 if (needsToClone)
68 row = clone_row_from_row(row);
69
70 Object key = KeyHasher.buildHashKey(row, key_column_numbers);
71
72 if ((duplicate_value =
73 hash_table.put(key, row)) != null)
74 {
75 Vector row_vec;
76
77 // inserted a duplicate
78 if ((duplicate_value instanceof vector))
79 {
80 row_vec = (Vector) duplicate_value;
81 }
82 else
83 {
84 // allocate vector to hold duplicates
85 row_vec = new Vector(2);
86
87 // insert original row into vector
88 row_vec.addElement(duplicate_value);
89
90 // put the vector as the data rather than the row
91 hash_table.put(key, row_vec);
92 }
93
94 // insert new row into vector
95 row_vec.addElement(row);
96 }
97 }
98
99 **/
100
101 public class BackingStoreHashtable
102 {
103
104 /**************************************************************************
105 * Fields of the class
106 **************************************************************************
107 */
108 private TransactionController tc;
109 private Hashtable hash_table;
110 private int[] key_column_numbers;
111 private boolean remove_duplicates;
112 private boolean skipNullKeyColumns;
113 private Properties auxillary_runtimestats;
114 private RowSource row_source;
115 /* If max_inmemory_rowcnt > 0 then use that to decide when to spill to disk.
116 * Otherwise compute max_inmemory_size based on the JVM memory size when the BackingStoreHashtable
117 * is constructed and use that to decide when to spill to disk.
118 */
119 private long max_inmemory_rowcnt;
120 private long inmemory_rowcnt;
121 private long max_inmemory_size;
122 private boolean keepAfterCommit;
123
124 private static int vectorSize; // The estimated number of bytes used by Vector(0)
125 static {
126 try
127 {
128 vectorSize = ClassSize.estimateBase( java.util.Vector.class);
129 }
130 catch( SecurityException se)
131 {
132 vectorSize = 4*ClassSize.refSize;
133 }
134 };
135
136 private DiskHashtable diskHashtable;
137
138 /**************************************************************************
139 * Constructors for This class:
140 **************************************************************************
141 */
142 private BackingStoreHashtable(){}
143
144 /**
145 * Create the BackingStoreHashtable from a row source.
146 * <p>
147 * This routine drains the RowSource. The performance characteristics
148 * depends on the number of rows inserted and the parameters to the
149 * constructor.
150 * <p>
151 * If the number of rows is <= "max_inmemory_rowcnt", then the rows are
152 * inserted into a java.util.Hashtable. In this case no
153 * TransactionController is necessary, a "null" tc is valid.
154 * <p>
155 * If the number of rows is > "max_inmemory_rowcnt", then the rows will
156 * be all placed in some sort of Access temporary file on disk. This
157 * case requires a valid TransactionController.
158 *
159 * @param tc An open TransactionController to be used if the
160 * hash table needs to overflow to disk.
161 *
162 * @param row_source RowSource to read rows from.
163 *
164 * @param key_column_numbers The column numbers of the columns in the
165 * scan result row to be the key to the Hashtable.
166 * "0" is the first column in the scan result
167 * row (which may be different than the first
168 * row in the table of the scan).
169 *
170 * @param remove_duplicates Should the Hashtable automatically remove
171 * duplicates, or should it create the Vector of
172 * duplicates?
173 *
174 * @param estimated_rowcnt The estimated number of rows in the hash table.
175 * Pass in -1 if there is no estimate.
176 *
177 * @param max_inmemory_rowcnt
178 * The maximum number of rows to insert into the
179 * inmemory Hash table before overflowing to disk.
180 * Pass in -1 if there is no maximum.
181 *
182 * @param initialCapacity If not "-1" used to initialize the java
183 * Hashtable.
184 *
185 * @param loadFactor If not "-1" used to initialize the java
186 * Hashtable.
187 *
188 * @param skipNullKeyColumns Skip rows with a null key column, if true.
189 *
190 * @param keepAfterCommit If true the hash table is kept after a commit,
191 * if false the hash table is dropped on the next commit.
192 *
193 *
194 * @exception StandardException Standard exception policy.
195 **/
196 public BackingStoreHashtable(
197 TransactionController tc,
198 RowSource row_source,
199 int[] key_column_numbers,
200 boolean remove_duplicates,
201 long estimated_rowcnt,
202 long max_inmemory_rowcnt,
203 int initialCapacity,
204 float loadFactor,
205 boolean skipNullKeyColumns,
206 boolean keepAfterCommit)
207 throws StandardException
208 {
209 this.key_column_numbers = key_column_numbers;
210 this.remove_duplicates = remove_duplicates;
211 this.row_source = row_source;
212 this.skipNullKeyColumns = skipNullKeyColumns;
213 this.max_inmemory_rowcnt = max_inmemory_rowcnt;
214 if( max_inmemory_rowcnt > 0)
215 max_inmemory_size = Long.MAX_VALUE;
216 else
217 max_inmemory_size = Runtime.getRuntime().totalMemory()/100;
218 this.tc = tc;
219 this.keepAfterCommit = keepAfterCommit;
220
221 Object[] row;
222
223 // use passed in capacity and loadfactor if not -1, you must specify
224 // capacity if you want to specify loadfactor.
225 if (initialCapacity != -1)
226 {
227 hash_table =
228 ((loadFactor == -1) ?
229 new Hashtable(initialCapacity) :
230 new Hashtable(initialCapacity, loadFactor));
231 }
232 else
233 {
234 hash_table =
235 ((estimated_rowcnt <= 0) ?
236 new Hashtable() : new Hashtable((int) estimated_rowcnt));
237 }
238
239 if (row_source != null)
240 {
241 boolean needsToClone = row_source.needsToClone();
242
243 while ((row = getNextRowFromRowSource()) != null)
244 {
245
246 if (needsToClone)
247 {
248 row = cloneRow(row);
249 }
250
251 Object key =
252 KeyHasher.buildHashKey(row, key_column_numbers);
253
254 add_row_to_hash_table(hash_table, key, row);
255 }
256 }
257 }
258
259 /**************************************************************************
260 * Private/Protected methods of This class:
261 **************************************************************************
262 */
263
264 /**
265 * Call method to either get next row or next row with non-null
266 * key columns.
267 *
268 *
269 * @exception StandardException Standard exception policy.
270 */
271 private Object[] getNextRowFromRowSource()
272 throws StandardException
273 {
274 Object[] row = row_source.getNextRowFromRowSource();
275
276 if (skipNullKeyColumns)
277 {
278 while (row != null)
279 {
280 // Are any key columns null?
281 int index = 0;
282 for ( ; index < key_column_numbers.length; index++)
283 {
284 if (SanityManager.DEBUG)
285 {
286 if (! (row[key_column_numbers[index]] instanceof Storable))
287 {
288 SanityManager.THROWASSERT(
289 "row[key_column_numbers[index]] expected to be Storable, not " +
290 row[key_column_numbers[index]].getClass().getName());
291 }
292 }
293 Storable storable = (Storable) row[key_column_numbers[index]];
294 if (storable.isNull())
295 {
296 break;
297 }
298 }
299 // No null key columns
300 if (index == key_column_numbers.length)
301 {
302 return row;
303 }
304 // 1 or more null key columns
305 row = row_source.getNextRowFromRowSource();
306 }
307 }
308 return row;
309 }
310
311 /**
312 * Return a cloned copy of the row.
313 *
314 * @return The cloned row row to use.
315 *
316 * @exception StandardException Standard exception policy.
317 **/
318 static Object[] cloneRow(Object[] old_row)
319 throws StandardException
320 {
321 Object[] new_row = new DataValueDescriptor[old_row.length];
322
323 // the only difference between getClone and cloneObject is cloneObject does
324 // not objectify a stream. We use getClone here. Beetle 4896.
325 for (int i = 0; i < old_row.length; i++)
326 {
327 if( old_row[i] != null)
328 new_row[i] = ((DataValueDescriptor) old_row[i]).getClone();
329 }
330
331 return(new_row);
332 }
333
334 /**
335 * Do the work to add one row to the hash table.
336 * <p>
337 *
338 * @param row Row to add to the hash table.
339 * @param hash_table The java HashTable to load into.
340 *
341 * @exception StandardException Standard exception policy.
342 **/
343 private void add_row_to_hash_table(
344 Hashtable hash_table,
345 Object key,
346 Object[] row)
347 throws StandardException
348 {
349 if( spillToDisk( hash_table, key, row))
350 return;
351
352 Object duplicate_value = null;
353
354 if ((duplicate_value = hash_table.put(key, row)) == null)
355 doSpaceAccounting( row, false);
356 else
357 {
358 if (!remove_duplicates)
359 {
360 Vector row_vec;
361
362 // inserted a duplicate
363 if ((duplicate_value instanceof Vector))
364 {
365 doSpaceAccounting( row, false);
366 row_vec = (Vector) duplicate_value;
367 }
368 else
369 {
370 // allocate vector to hold duplicates
371 row_vec = new Vector(2);
372
373 // insert original row into vector
374 row_vec.addElement(duplicate_value);
375 doSpaceAccounting( row, true);
376 }
377
378 // insert new row into vector
379 row_vec.addElement(row);
380
381 // store vector of rows back into hash table,
382 // overwriting the duplicate key that was
383 // inserted.
384 hash_table.put(key, row_vec);
385 }
386 }
387
388 row = null;
389 }
390
391 private void doSpaceAccounting( Object[] row,
392 boolean firstDuplicate)
393 {
394 inmemory_rowcnt++;
395 if( max_inmemory_rowcnt <= 0)
396 {
397 for( int i = 0; i < row.length; i++)
398 {
399 if( row[i] instanceof DataValueDescriptor)
400 max_inmemory_size -= ((DataValueDescriptor) row[i]).estimateMemoryUsage();
401 max_inmemory_size -= ClassSize.refSize;
402 }
403 max_inmemory_size -= ClassSize.refSize;
404 if( firstDuplicate)
405 max_inmemory_size -= vectorSize;
406 }
407 } // end of doSpaceAccounting
408
409 /**
410 * Determine whether a new row should be spilled to disk and, if so, do it.
411 *
412 * @param hash_table The in-memory hash table
413 * @param key The row's key
414 * @param row
415 *
416 * @return true if the row was spilled to disk, false if not
417 *
418 * @exception StandardException Standard exception policy.
419 */
420 private boolean spillToDisk( Hashtable hash_table,
421 Object key,
422 Object[] row)
423 throws StandardException
424 {
425 // Once we have started spilling all new rows will go to disk, even if we have freed up some
426 // memory by moving duplicates to disk. This simplifies handling of duplicates and accounting.
427 if( diskHashtable == null)
428 {
429 if( max_inmemory_rowcnt > 0)
430 {
431 if( inmemory_rowcnt < max_inmemory_rowcnt)
432 return false; // Do not spill
433 }
434 else if( max_inmemory_size > 0)
435 return false;
436 // Want to start spilling
437 if( ! (row instanceof DataValueDescriptor[]))
438 {
439 if( SanityManager.DEBUG)
440 SanityManager.THROWASSERT( "BackingStoreHashtable row is not DataValueDescriptor[]");
441 // Do not know how to put it on disk
442 return false;
443 }
444 diskHashtable = new DiskHashtable( tc,
445 (DataValueDescriptor[]) row,
446 key_column_numbers,
447 remove_duplicates,
448 keepAfterCommit);
449 }
450
451 Object duplicateValue = hash_table.get( key);
452 if( duplicateValue != null)
453 {
454 if( remove_duplicates)
455 return true; // a degenerate case of spilling
456 // If we are keeping duplicates then move all the duplicates from memory to disk
457 // This simplifies finding duplicates: they are either all in memory or all on disk.
458 if( duplicateValue instanceof Vector)
459 {
460 Vector duplicateVec = (Vector) duplicateValue;
461 for( int i = duplicateVec.size() - 1; i >= 0; i--)
462 {
463 Object[] dupRow = (Object[]) duplicateVec.elementAt(i);
464 diskHashtable.put( key, dupRow);
465 }
466 }
467 else
468 diskHashtable.put( key, (Object []) duplicateValue);
469 hash_table.remove( key);
470 }
471 diskHashtable.put( key, row);
472 return true;
473 } // end of spillToDisk
474 /**************************************************************************
475 * Public Methods of This class:
476 **************************************************************************
477 */
478
479 /**
480 * Close the BackingStoreHashtable.
481 * <p>
482 * Perform any necessary cleanup after finishing with the hashtable. Will
483 * deallocate/dereference objects as necessary. If the table has gone
484 * to disk this will drop any on disk files used to support the hash table.
485 * <p>
486 *
487 * @exception StandardException Standard exception policy.
488 **/
489 public void close()
490 throws StandardException
491 {
492 hash_table = null;
493 if( diskHashtable != null)
494 {
495 diskHashtable.close();
496 diskHashtable = null;
497 }
498 return;
499 }
500
501 /**
502 * Return an Enumeration that can be used to scan entire table.
503 * <p>
504 * RESOLVE - is it worth it to support this routine when we have a
505 * disk overflow hash table?
506 *
507 * @return The Enumeration.
508 *
509 * @exception StandardException Standard exception policy.
510 **/
511 public Enumeration elements()
512 throws StandardException
513 {
514 if( diskHashtable == null)
515 return(hash_table.elements());
516 return new BackingStoreHashtableEnumeration();
517 }
518
519 /**
520 * get data associated with given key.
521 * <p>
522 * There are 2 different types of objects returned from this routine.
523 * <p>
524 * In both cases, the key value is either the object stored in
525 * row[key_column_numbers[0]], if key_column_numbers.length is 1,
526 * otherwise it is a KeyHasher containing
527 * the objects stored in row[key_column_numbers[0, 1, ...]].
528 * For every qualifying unique row value an entry is placed into the
529 * Hashtable.
530 * <p>
531 * For row values with duplicates, the value of the data is a Vector of
532 * rows.
533 * <p>
534 * The caller will have to call "instanceof" on the data value
535 * object if duplicates are expected, to determine if the data value
536 * of the Hashtable entry is a row or is a Vector of rows.
537 * <p>
538 * The BackingStoreHashtable "owns" the objects returned from the get()
539 * routine. They remain valid until the next access to the
540 * BackingStoreHashtable. If the client needs to keep references to these
541 * objects, it should clone copies of the objects. A valid
542 * BackingStoreHashtable can place all rows into a disk based conglomerate,
543 * declare a row buffer and then reuse that row buffer for every get()
544 * call.
545 *
546 * @return The value to which the key is mapped in this hashtable;
547 * null if the key is not mapped to any value in this hashtable.
548 *
549 * @param key The key to hash on.
550 *
551 * @exception StandardException Standard exception policy.
552 **/
553 public Object get(Object key)
554 throws StandardException
555 {
556 Object obj = hash_table.get(key);
557 if( diskHashtable == null || obj != null)
558 return obj;
559 return diskHashtable.get( key);
560 }
561
562 /**
563 * Return runtime stats to caller by adding them to prop.
564 * <p>
565 *
566 * @param prop The set of properties to append to.
567 *
568 * @exception StandardException Standard exception policy.
569 **/
570 public void getAllRuntimeStats(Properties prop)
571 throws StandardException
572 {
573 if (auxillary_runtimestats != null)
574 org.apache.derby.iapi.util.PropertyUtil.copyProperties(auxillary_runtimestats, prop);
575 }
576
577 /**
578 * remove a row from the hash table.
579 * <p>
580 * a remove of a duplicate removes the entire duplicate list.
581 *
582 * @param key The key of the row to remove.
583 *
584 * @exception StandardException Standard exception policy.
585 **/
586 public Object remove(
587 Object key)
588 throws StandardException
589 {
590 Object obj = hash_table.remove(key);
591 if( obj != null || diskHashtable == null)
592 return obj;
593 return diskHashtable.remove(key);
594 }
595
596 /**
597 * Set the auxillary runtime stats.
598 * <p>
599 * getRuntimeStats() will return both the auxillary stats and any
600 * BackingStoreHashtable() specific stats. Note that each call to
601 * setAuxillaryRuntimeStats() overwrites the Property set that was
602 * set previously.
603 *
604 * @param prop The set of properties to append from.
605 *
606 * @exception StandardException Standard exception policy.
607 **/
608 public void setAuxillaryRuntimeStats(Properties prop)
609 throws StandardException
610 {
611 auxillary_runtimestats = prop;
612 }
613
614 /**
615 * Put a row into the hash table.
616 * <p>
617 * The in memory hash table will need to keep a reference to the row
618 * after the put call has returned. If "needsToClone" is true then the
619 * hash table will make a copy of the row and put that, else if
620 * "needsToClone" is false then the hash table will keep a reference to
621 * the row passed in and no copy will be made.
622 * <p>
623 * If rouine returns false, then no reference is kept to the duplicate
624 * row which was rejected (thus allowing caller to reuse the object).
625 *
626 * @param needsToClone does this routine have to make a copy of the row,
627 * in order to keep a reference to it after return?
628 * @param row The row to insert into the table.
629 *
630 * @return true if row was inserted into the hash table. Returns
631 * false if the BackingStoreHashtable is eliminating
632 * duplicates, and the row being inserted is a duplicate,
633 * or if we are skipping rows with 1 or more null key columns
634 * and we find a null key column.
635 *
636 * @exception StandardException Standard exception policy.
637 **/
638 public boolean put(
639 boolean needsToClone,
640 Object[] row)
641 throws StandardException
642 {
643 // Are any key columns null?
644 if (skipNullKeyColumns)
645 {
646 int index = 0;
647 for ( ; index < key_column_numbers.length; index++)
648 {
649 if (SanityManager.DEBUG)
650 {
651 if (! (row[key_column_numbers[index]] instanceof Storable))
652 {
653 SanityManager.THROWASSERT(
654 "row[key_column_numbers[index]] expected to be Storable, not " +
655 row[key_column_numbers[index]].getClass().getName());
656 }
657 }
658 Storable storable = (Storable) row[key_column_numbers[index]];
659 if (storable.isNull())
660 {
661 return false;
662 }
663 }
664 }
665
666 if (needsToClone)
667 {
668 row = cloneRow(row);
669 }
670
671 Object key = KeyHasher.buildHashKey(row, key_column_numbers);
672
673 if ((remove_duplicates) && (get(key) != null))
674 {
675 return(false);
676 }
677 else
678 {
679 add_row_to_hash_table(hash_table, key, row);
680 return(true);
681 }
682 }
683
684 /**
685 * Return number of unique rows in the hash table.
686 * <p>
687 *
688 * @return The number of unique rows in the hash table.
689 *
690 * @exception StandardException Standard exception policy.
691 **/
692 public int size()
693 throws StandardException
694 {
695 if( diskHashtable == null)
696 return(hash_table.size());
697 return hash_table.size() + diskHashtable.size();
698 }
699
700 private class BackingStoreHashtableEnumeration implements Enumeration
701 {
702 private Enumeration memoryEnumeration;
703 private Enumeration diskEnumeration;
704
705 BackingStoreHashtableEnumeration()
706 {
707 memoryEnumeration = hash_table.elements();
708 if( diskHashtable != null)
709 {
710 try
711 {
712 diskEnumeration = diskHashtable.elements();
713 }
714 catch( StandardException se)
715 {
716 diskEnumeration = null;
717 }
718 }
719 }
720
721 public boolean hasMoreElements()
722 {
723 if( memoryEnumeration != null)
724 {
725 if( memoryEnumeration.hasMoreElements())
726 return true;
727 memoryEnumeration = null;
728 }
729 if( diskEnumeration == null)
730 return false;
731 return diskEnumeration.hasMoreElements();
732 }
733
734 public Object nextElement() throws NoSuchElementException
735 {
736 if( memoryEnumeration != null)
737 {
738 if( memoryEnumeration.hasMoreElements())
739 return memoryEnumeration.nextElement();
740 memoryEnumeration = null;
741 }
742 return diskEnumeration.nextElement();
743 }
744 } // end of class BackingStoreHashtableEnumeration
745 }