1 package org.apache.lucene.index;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import org.apache.lucene.analysis.Analyzer;
21 import org.apache.lucene.document.Document;
22 import org.apache.lucene.index.DocumentsWriter.IndexingChain;
23 import org.apache.lucene.search.Similarity;
24 import org.apache.lucene.search.Query;
25 import org.apache.lucene.store.Directory;
26 import org.apache.lucene.store.Lock;
27 import org.apache.lucene.store.LockObtainFailedException;
28 import org.apache.lucene.store.AlreadyClosedException;
29 import org.apache.lucene.store.BufferedIndexInput;
30 import org.apache.lucene.util.Constants;
31 import org.apache.lucene.util.ThreadInterruptedException;
32
33 import java.io.IOException;
34 import java.io.Closeable;
35 import java.io.PrintStream;
36 import java.util.List;
37 import java.util.Collection;
38 import java.util.ArrayList;
39 import java.util.HashMap;
40 import java.util.Set;
41 import java.util.HashSet;
42 import java.util.LinkedList;
43 import java.util.Iterator;
44 import java.util.Map;
45
46 /**
47 An <code>IndexWriter</code> creates and maintains an index.
48
49 <p>The <code>create</code> argument to the {@link
50 #IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines
51 whether a new index is created, or whether an existing index is
52 opened. Note that you can open an index with <code>create=true</code>
53 even while readers are using the index. The old readers will
54 continue to search the "point in time" snapshot they had opened,
55 and won't see the newly created index until they re-open. There are
56 also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors}
57 with no <code>create</code> argument which will create a new index
58 if there is not already an index at the provided path and otherwise
59 open the existing index.</p>
60
61 <p>In either case, documents are added with {@link #addDocument(Document)
62 addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
63 #deleteDocuments(Query)}. A document can be updated with {@link
64 #updateDocument(Term, Document) updateDocument} (which just deletes
65 and then adds the entire document). When finished adding, deleting
66 and updating documents, {@link #close() close} should be called.</p>
67
68 <a name="flush"></a>
69 <p>These changes are buffered in memory and periodically
70 flushed to the {@link Directory} (during the above method
71 calls). A flush is triggered when there are enough
72 buffered deletes (see {@link #setMaxBufferedDeleteTerms})
73 or enough added documents since the last flush, whichever
74 is sooner. For the added documents, flushing is triggered
75 either by RAM usage of the documents (see {@link
76 #setRAMBufferSizeMB}) or the number of added documents.
77 The default is to flush when RAM usage hits 16 MB. For
78 best indexing speed you should flush by RAM usage with a
79 large RAM buffer. Note that flushing just moves the
80 internal buffered state in IndexWriter into the index, but
81 these changes are not visible to IndexReader until either
82 {@link #commit()} or {@link #close} is called. A flush may
83 also trigger one or more segment merges which by default
84 run with a background thread so as not to block the
85 addDocument calls (see <a href="#mergePolicy">below</a>
86 for changing the {@link MergeScheduler}).</p>
87
88 <p>If an index will not have more documents added for a while and optimal search
89 performance is desired, then either the full {@link #optimize() optimize}
90 method or partial {@link #optimize(int)} method should be
91 called before the index is closed.</p>
92
93 <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
94 another <code>IndexWriter</code> on the same directory will lead to a
95 {@link LockObtainFailedException}. The {@link LockObtainFailedException}
96 is also thrown if an IndexReader on the same directory is used to delete documents
97 from the index.</p>
98
99 <a name="deletionPolicy"></a>
100 <p>Expert: <code>IndexWriter</code> allows an optional
101 {@link IndexDeletionPolicy} implementation to be
102 specified. You can use this to control when prior commits
103 are deleted from the index. The default policy is {@link
104 KeepOnlyLastCommitDeletionPolicy} which removes all prior
105 commits as soon as a new commit is done (this matches
106 behavior before 2.2). Creating your own policy can allow
107 you to explicitly keep previous "point in time" commits
108 alive in the index for some time, to allow readers to
109 refresh to the new commit without having the old commit
110 deleted out from under them. This is necessary on
111 filesystems like NFS that do not support "delete on last
112 close" semantics, which Lucene's "point in time" search
113 normally relies on. </p>
114
115 <a name="mergePolicy"></a> <p>Expert:
116 <code>IndexWriter</code> allows you to separately change
117 the {@link MergePolicy} and the {@link MergeScheduler}.
118 The {@link MergePolicy} is invoked whenever there are
119 changes to the segments in the index. Its role is to
120 select which merges to do, if any, and return a {@link
121 MergePolicy.MergeSpecification} describing the merges. It
122 also selects merges to do for optimize(). (The default is
123 {@link LogByteSizeMergePolicy}. Then, the {@link
124 MergeScheduler} is invoked with the requested merges and
125 it decides when and how to run the merges. The default is
126 {@link ConcurrentMergeScheduler}. </p>
127
128 <a name="OOME"></a><p><b>NOTE</b>: if you hit an
129 OutOfMemoryError then IndexWriter will quietly record this
130 fact and block all future segment commits. This is a
131 defensive measure in case any internal state (buffered
132 documents and deletions) were corrupted. Any subsequent
133 calls to {@link #commit()} will throw an
134 IllegalStateException. The only course of action is to
135 call {@link #close()}, which internally will call {@link
136 #rollback()}, to undo any changes to the index since the
137 last commit. You can also just call {@link #rollback()}
138 directly.</p>
139
140 <a name="thread-safety"></a><p><b>NOTE</b>: {@link
141 <code>IndexWriter</code>} instances are completely thread
142 safe, meaning multiple threads can call any of its
143 methods, concurrently. If your application requires
144 external synchronization, you should <b>not</b>
145 synchronize on the <code>IndexWriter</code> instance as
146 this may cause deadlock; use your own (non-Lucene) objects
147 instead. </p>
148
149 <p><b>NOTE</b>: If you call
150 <code>Thread.interrupt()</code> on a thread that's within
151 IndexWriter, IndexWriter will try to catch this (eg, if
152 it's in a wait() or Thread.sleep()), and will then throw
153 the unchecked exception {@link ThreadInterruptedException}
154 and <b>clear</b> the interrupt status on the thread.</p>
155 */
156
157 /*
158 * Clarification: Check Points (and commits)
159 * IndexWriter writes new index files to the directory without writing a new segments_N
160 * file which references these new files. It also means that the state of
161 * the in memory SegmentInfos object is different than the most recent
162 * segments_N file written to the directory.
163 *
164 * Each time the SegmentInfos is changed, and matches the (possibly
165 * modified) directory files, we have a new "check point".
166 * If the modified/new SegmentInfos is written to disk - as a new
167 * (generation of) segments_N file - this check point is also an
168 * IndexCommit.
169 *
170 * A new checkpoint always replaces the previous checkpoint and
171 * becomes the new "front" of the index. This allows the IndexFileDeleter
172 * to delete files that are referenced only by stale checkpoints.
173 * (files that were created since the last commit, but are no longer
174 * referenced by the "front" of the index). For this, IndexFileDeleter
175 * keeps track of the last non commit checkpoint.
176 */
177 public class IndexWriter implements Closeable {
178
179 /**
180 * Default value for the write lock timeout (1,000).
181 * @see #setDefaultWriteLockTimeout
182 */
183 public static long WRITE_LOCK_TIMEOUT = 1000;
184
185 private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
186
187 /**
188 * Name of the write lock in the index.
189 */
190 public static final String WRITE_LOCK_NAME = "write.lock";
191
192 /**
193 * Value to denote a flush trigger is disabled
194 */
195 public final static int DISABLE_AUTO_FLUSH = -1;
196
197 /**
198 * Disabled by default (because IndexWriter flushes by RAM usage
199 * by default). Change using {@link #setMaxBufferedDocs(int)}.
200 */
201 public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
202
203 /**
204 * Default value is 16 MB (which means flush when buffered
205 * docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
206 */
207 public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
208
209 /**
210 * Disabled by default (because IndexWriter flushes by RAM usage
211 * by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
212 */
213 public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
214
215 /**
216 * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
217 */
218 public final static int DEFAULT_MAX_FIELD_LENGTH = 10000;
219
220 /**
221 * Default value is 128. Change using {@link #setTermIndexInterval(int)}.
222 */
223 public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;
224
225 /**
226 * Absolute hard maximum length for a term. If a term
227 * arrives from the analyzer longer than this length, it
228 * is skipped and a message is printed to infoStream, if
229 * set (see {@link #setInfoStream}).
230 */
231 public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
232
233 // The normal read buffer size defaults to 1024, but
234 // increasing this during merging seems to yield
235 // performance gains. However we don't want to increase
236 // it too much because there are quite a few
237 // BufferedIndexInputs created during merging. See
238 // LUCENE-888 for details.
239 private final static int MERGE_READ_BUFFER_SIZE = 4096;
240
241 // Used for printing messages
242 private static Object MESSAGE_ID_LOCK = new Object();
243 private static int MESSAGE_ID = 0;
244 private int messageID = -1;
245 volatile private boolean hitOOM;
246
247 private Directory directory; // where this index resides
248 private Analyzer analyzer; // how to analyze text
249
250 private Similarity similarity = Similarity.getDefault(); // how to normalize
251
252 private volatile long changeCount; // increments every time a change is completed
253 private long lastCommitChangeCount; // last changeCount that was committed
254
255 private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
256 private HashMap<SegmentInfo,Integer> rollbackSegments;
257
258 volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
259 volatile long pendingCommitChangeCount;
260
261 private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
262 private int localFlushedDocCount; // saved docWriter.getFlushedDocCount during local transaction
263
264 private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
265
266 private DocumentsWriter docWriter;
267 private IndexFileDeleter deleter;
268
269 private Set<SegmentInfo> segmentsToOptimize = new HashSet<SegmentInfo>(); // used by optimize to note those needing optimization
270
271 private Lock writeLock;
272
273 private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
274
275 private boolean closed;
276 private boolean closing;
277
278 // Holds all SegmentInfo instances currently involved in
279 // merges
280 private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>();
281
282 private MergePolicy mergePolicy = new LogByteSizeMergePolicy(this);
283 private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
284 private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>();
285 private Set<MergePolicy.OneMerge> runningMerges = new HashSet<MergePolicy.OneMerge>();
286 private List<MergePolicy.OneMerge> mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
287 private long mergeGen;
288 private boolean stopMerges;
289
290 private int flushCount;
291 private int flushDeletesCount;
292
293 // Used to only allow one addIndexes to proceed at once
294 // TODO: use ReadWriteLock once we are on 5.0
295 private int readCount; // count of how many threads are holding read lock
296 private Thread writeThread; // non-null if any thread holds write lock
297 final ReaderPool readerPool = new ReaderPool();
298 private int upgradeCount;
299
300 // This is a "write once" variable (like the organic dye
301 // on a DVD-R that may or may not be heated by a laser and
302 // then cooled to permanently record the event): it's
303 // false, until getReader() is called for the first time,
304 // at which point it's switched to true and never changes
305 // back to false. Once this is true, we hold open and
306 // reuse SegmentReader instances internally for applying
307 // deletes, doing merges, and reopening near real-time
308 // readers.
309 private volatile boolean poolReaders;
310
311 /**
312 * Expert: returns a readonly reader, covering all
313 * committed as well as un-committed changes to the index.
314 * This provides "near real-time" searching, in that
315 * changes made during an IndexWriter session can be
316 * quickly made available for searching without closing
317 * the writer nor calling {@link #commit}.
318 *
319 * <p>Note that this is functionally equivalent to calling
320 * {#commit} and then using {@link IndexReader#open} to
321 * open a new reader. But the turarnound time of this
322 * method should be faster since it avoids the potentially
323 * costly {@link #commit}.</p>
324 *
325 * <p>You must close the {@link IndexReader} returned by
326 * this method once you are done using it.</p>
327 *
328 * <p>It's <i>near</i> real-time because there is no hard
329 * guarantee on how quickly you can get a new reader after
330 * making changes with IndexWriter. You'll have to
331 * experiment in your situation to determine if it's
332 * fast enough. As this is a new and experimental
333 * feature, please report back on your findings so we can
334 * learn, improve and iterate.</p>
335 *
336 * <p>The resulting reader supports {@link
337 * IndexReader#reopen}, but that call will simply forward
338 * back to this method (though this may change in the
339 * future).</p>
340 *
341 * <p>The very first time this method is called, this
342 * writer instance will make every effort to pool the
343 * readers that it opens for doing merges, applying
344 * deletes, etc. This means additional resources (RAM,
345 * file descriptors, CPU time) will be consumed.</p>
346 *
347 * <p>For lower latency on reopening a reader, you should
348 * call {@link #setMergedSegmentWarmer} to
349 * pre-warm a newly merged segment before it's committed
350 * to the index. This is important for minimizing
351 * index-to-search delay after a large merge. </p>
352 *
353 * <p>If an addIndexes* call is running in another thread,
354 * then this reader will only search those segments from
355 * the foreign index that have been successfully copied
356 * over, so far</p>.
357 *
358 * <p><b>NOTE</b>: Once the writer is closed, any
359 * outstanding readers may continue to be used. However,
360 * if you attempt to reopen any of those readers, you'll
361 * hit an {@link AlreadyClosedException}.</p>
362 *
363 * <p><b>NOTE:</b> This API is experimental and might
364 * change in incompatible ways in the next release.</p>
365 *
366 * @return IndexReader that covers entire index plus all
367 * changes made so far by this IndexWriter instance
368 *
369 * @throws IOException
370 */
371 public IndexReader getReader() throws IOException {
372 return getReader(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
373 }
374
375 /** Expert: like {@link #getReader}, except you can
376 * specify which termInfosIndexDivisor should be used for
377 * any newly opened readers.
378 * @param termInfosIndexDivisor Subsamples which indexed
379 * terms are loaded into RAM. This has the same effect as {@link
380 * IndexWriter#setTermIndexInterval} except that setting
381 * must be done at indexing time while this setting can be
382 * set per reader. When set to N, then one in every
383 * N*termIndexInterval terms in the index is loaded into
384 * memory. By setting this to a value > 1 you can reduce
385 * memory usage, at the expense of higher latency when
386 * loading a TermInfo. The default value is 1. Set this
387 * to -1 to skip loading the terms index entirely. */
388 public IndexReader getReader(int termInfosIndexDivisor) throws IOException {
389 if (infoStream != null) {
390 message("flush at getReader");
391 }
392
393 // Do this up front before flushing so that the readers
394 // obtained during this flush are pooled, the first time
395 // this method is called:
396 poolReaders = true;
397
398 flush(true, true, false);
399
400 // Prevent segmentInfos from changing while opening the
401 // reader; in theory we could do similar retry logic,
402 // just like we do when loading segments_N
403 synchronized(this) {
404 applyDeletes();
405 return new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor);
406 }
407 }
408
409 /** Holds shared SegmentReader instances. IndexWriter uses
410 * SegmentReaders for 1) applying deletes, 2) doing
411 * merges, 3) handing out a real-time reader. This pool
412 * reuses instances of the SegmentReaders in all these
413 * places if it is in "near real-time mode" (getReader()
414 * has been called on this instance). */
415
416 class ReaderPool {
417
418 private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
419
420 /** Forcefully clear changes for the specified segments,
421 * and remove from the pool. This is called on successful merge. */
422 synchronized void clear(SegmentInfos infos) throws IOException {
423 if (infos == null) {
424 for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
425 ent.getValue().hasChanges = false;
426 }
427 } else {
428 for (final SegmentInfo info: infos) {
429 if (readerMap.containsKey(info)) {
430 readerMap.get(info).hasChanges = false;
431 }
432 }
433 }
434 }
435
436 // used only by asserts
437 public synchronized boolean infoIsLive(SegmentInfo info) {
438 int idx = segmentInfos.indexOf(info);
439 assert idx != -1;
440 assert segmentInfos.get(idx) == info;
441 return true;
442 }
443
444 public synchronized SegmentInfo mapToLive(SegmentInfo info) {
445 int idx = segmentInfos.indexOf(info);
446 if (idx != -1) {
447 info = segmentInfos.get(idx);
448 }
449 return info;
450 }
451
452 /**
453 * Release the segment reader (i.e. decRef it and close if there
454 * are no more references.
455 * @param sr
456 * @throws IOException
457 */
458 public synchronized void release(SegmentReader sr) throws IOException {
459 release(sr, false);
460 }
461
462 /**
463 * Release the segment reader (i.e. decRef it and close if there
464 * are no more references.
465 * @param sr
466 * @throws IOException
467 */
468 public synchronized void release(SegmentReader sr, boolean drop) throws IOException {
469
470 final boolean pooled = readerMap.containsKey(sr.getSegmentInfo());
471
472 assert !pooled | readerMap.get(sr.getSegmentInfo()) == sr;
473
474 // Drop caller's ref
475 sr.decRef();
476
477 if (pooled && (drop || (!poolReaders && sr.getRefCount() == 1))) {
478
479 // We are the last ref to this reader; since we're
480 // not pooling readers, we release it:
481 readerMap.remove(sr.getSegmentInfo());
482
483 assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this);
484
485 // Drop our ref -- this will commit any pending
486 // changes to the dir
487 boolean success = false;
488 try {
489 sr.close();
490 success = true;
491 } finally {
492 if (!success && sr.hasChanges) {
493 // Abandon the changes & retry closing:
494 sr.hasChanges = false;
495 try {
496 sr.close();
497 } catch (Throwable ignore) {
498 // Keep throwing original exception
499 }
500 }
501 }
502 }
503 }
504
505 /** Remove all our references to readers, and commits
506 * any pending changes. */
507 synchronized void close() throws IOException {
508 Iterator<Map.Entry<SegmentInfo,SegmentReader>> iter = readerMap.entrySet().iterator();
509 while (iter.hasNext()) {
510
511 Map.Entry<SegmentInfo,SegmentReader> ent = iter.next();
512
513 SegmentReader sr = ent.getValue();
514 if (sr.hasChanges) {
515 assert infoIsLive(sr.getSegmentInfo());
516 sr.startCommit();
517 boolean success = false;
518 try {
519 sr.doCommit(null);
520 success = true;
521 } finally {
522 if (!success) {
523 sr.rollbackCommit();
524 }
525 }
526 }
527
528 iter.remove();
529
530 // NOTE: it is allowed that this decRef does not
531 // actually close the SR; this can happen when a
532 // near real-time reader is kept open after the
533 // IndexWriter instance is closed
534 sr.decRef();
535 }
536 }
537
538 /**
539 * Commit all segment reader in the pool.
540 * @throws IOException
541 */
542 synchronized void commit() throws IOException {
543 for (Map.Entry<SegmentInfo,SegmentReader> ent : readerMap.entrySet()) {
544
545 SegmentReader sr = ent.getValue();
546 if (sr.hasChanges) {
547 assert infoIsLive(sr.getSegmentInfo());
548 sr.startCommit();
549 boolean success = false;
550 try {
551 sr.doCommit(null);
552 success = true;
553 } finally {
554 if (!success) {
555 sr.rollbackCommit();
556 }
557 }
558 }
559 }
560 }
561
562 /**
563 * Returns a ref to a clone. NOTE: this clone is not
564 * enrolled in the pool, so you should simply close()
565 * it when you're done (ie, do not call release()).
566 */
567 public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException {
568 SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);
569 try {
570 return (SegmentReader) sr.clone(true);
571 } finally {
572 sr.decRef();
573 }
574 }
575
576 /**
577 * Obtain a SegmentReader from the readerPool. The reader
578 * must be returned by calling {@link #release(SegmentReader)}
579 * @see #release(SegmentReader)
580 * @param info
581 * @param doOpenStores
582 * @throws IOException
583 */
584 public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException {
585 return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
586 }
587 /**
588 * Obtain a SegmentReader from the readerPool. The reader
589 * must be returned by calling {@link #release(SegmentReader)}
590 *
591 * @see #release(SegmentReader)
592 * @param info
593 * @param doOpenStores
594 * @param readBufferSize
595 * @param termsIndexDivisor
596 * @throws IOException
597 */
598 public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException {
599
600 if (poolReaders) {
601 readBufferSize = BufferedIndexInput.BUFFER_SIZE;
602 }
603
604 SegmentReader sr = readerMap.get(info);
605 if (sr == null) {
606 // TODO: we may want to avoid doing this while
607 // synchronized
608 // Returns a ref, which we xfer to readerMap:
609 sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
610 readerMap.put(info, sr);
611 } else {
612 if (doOpenStores) {
613 sr.openDocStores();
614 }
615 if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) {
616 // If this reader was originally opened because we
617 // needed to merge it, we didn't load the terms
618 // index. But now, if the caller wants the terms
619 // index (eg because it's doing deletes, or an NRT
620 // reader is being opened) we ask the reader to
621 // load its terms index.
622 sr.loadTermsIndex(termsIndexDivisor);
623 }
624 }
625
626 // Return a ref to our caller
627 sr.incRef();
628 return sr;
629 }
630
631 // Returns a ref
632 public synchronized SegmentReader getIfExists(SegmentInfo info) throws IOException {
633 SegmentReader sr = readerMap.get(info);
634 if (sr != null) {
635 sr.incRef();
636 }
637 return sr;
638 }
639 }
640
641 /**
642 * Obtain the number of deleted docs for a pooled reader.
643 * If the reader isn't being pooled, the segmentInfo's
644 * delCount is returned.
645 */
646 public int numDeletedDocs(SegmentInfo info) throws IOException {
647 SegmentReader reader = readerPool.getIfExists(info);
648 try {
649 if (reader != null) {
650 return reader.numDeletedDocs();
651 } else {
652 return info.getDelCount();
653 }
654 } finally {
655 if (reader != null) {
656 readerPool.release(reader);
657 }
658 }
659 }
660
661 synchronized void acquireWrite() {
662 assert writeThread != Thread.currentThread();
663 while(writeThread != null || readCount > 0)
664 doWait();
665
666 // We could have been closed while we were waiting:
667 ensureOpen();
668
669 writeThread = Thread.currentThread();
670 }
671
672 synchronized void releaseWrite() {
673 assert Thread.currentThread() == writeThread;
674 writeThread = null;
675 notifyAll();
676 }
677
678 synchronized void acquireRead() {
679 final Thread current = Thread.currentThread();
680 while(writeThread != null && writeThread != current)
681 doWait();
682
683 readCount++;
684 }
685
686 // Allows one readLock to upgrade to a writeLock even if
687 // there are other readLocks as long as all other
688 // readLocks are also blocked in this method:
689 synchronized void upgradeReadToWrite() {
690 assert readCount > 0;
691 upgradeCount++;
692 while(readCount > upgradeCount || writeThread != null) {
693 doWait();
694 }
695
696 writeThread = Thread.currentThread();
697 readCount--;
698 upgradeCount--;
699 }
700
701 synchronized void releaseRead() {
702 readCount--;
703 assert readCount >= 0;
704 notifyAll();
705 }
706
707 synchronized final boolean isOpen(boolean includePendingClose) {
708 return !(closed || (includePendingClose && closing));
709 }
710
711 /**
712 * Used internally to throw an {@link
713 * AlreadyClosedException} if this IndexWriter has been
714 * closed.
715 * @throws AlreadyClosedException if this IndexWriter is
716 */
717 protected synchronized final void ensureOpen(boolean includePendingClose) throws AlreadyClosedException {
718 if (!isOpen(includePendingClose)) {
719 throw new AlreadyClosedException("this IndexWriter is closed");
720 }
721 }
722
723 protected synchronized final void ensureOpen() throws AlreadyClosedException {
724 ensureOpen(true);
725 }
726
727 /**
728 * Prints a message to the infoStream (if non-null),
729 * prefixed with the identifying information for this
730 * writer and the thread that's calling it.
731 */
732 public void message(String message) {
733 if (infoStream != null)
734 infoStream.println("IW " + messageID + " [" + Thread.currentThread().getName() + "]: " + message);
735 }
736
737 private synchronized void setMessageID(PrintStream infoStream) {
738 if (infoStream != null && messageID == -1) {
739 synchronized(MESSAGE_ID_LOCK) {
740 messageID = MESSAGE_ID++;
741 }
742 }
743 this.infoStream = infoStream;
744 }
745
746 /**
747 * Casts current mergePolicy to LogMergePolicy, and throws
748 * an exception if the mergePolicy is not a LogMergePolicy.
749 */
750 private LogMergePolicy getLogMergePolicy() {
751 if (mergePolicy instanceof LogMergePolicy)
752 return (LogMergePolicy) mergePolicy;
753 else
754 throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
755 }
756
757 /** <p>Get the current setting of whether newly flushed
758 * segments will use the compound file format. Note that
759 * this just returns the value previously set with
760 * setUseCompoundFile(boolean), or the default value
761 * (true). You cannot use this to query the status of
762 * previously flushed segments.</p>
763 *
764 * <p>Note that this method is a convenience method: it
765 * just calls mergePolicy.getUseCompoundFile as long as
766 * mergePolicy is an instance of {@link LogMergePolicy}.
767 * Otherwise an IllegalArgumentException is thrown.</p>
768 *
769 * @see #setUseCompoundFile(boolean)
770 */
771 public boolean getUseCompoundFile() {
772 return getLogMergePolicy().getUseCompoundFile();
773 }
774
775 /** <p>Setting to turn on usage of a compound file. When on,
776 * multiple files for each segment are merged into a
777 * single file when a new segment is flushed.</p>
778 *
779 * <p>Note that this method is a convenience method: it
780 * just calls mergePolicy.setUseCompoundFile as long as
781 * mergePolicy is an instance of {@link LogMergePolicy}.
782 * Otherwise an IllegalArgumentException is thrown.</p>
783 */
784 public void setUseCompoundFile(boolean value) {
785 getLogMergePolicy().setUseCompoundFile(value);
786 getLogMergePolicy().setUseCompoundDocStore(value);
787 }
788
789 /** Expert: Set the Similarity implementation used by this IndexWriter.
790 *
791 * @see Similarity#setDefault(Similarity)
792 */
793 public void setSimilarity(Similarity similarity) {
794 ensureOpen();
795 this.similarity = similarity;
796 docWriter.setSimilarity(similarity);
797 }
798
799 /** Expert: Return the Similarity implementation used by this IndexWriter.
800 *
801 * <p>This defaults to the current value of {@link Similarity#getDefault()}.
802 */
803 public Similarity getSimilarity() {
804 ensureOpen();
805 return this.similarity;
806 }
807
808 /** Expert: Set the interval between indexed terms. Large values cause less
809 * memory to be used by IndexReader, but slow random-access to terms. Small
810 * values cause more memory to be used by an IndexReader, and speed
811 * random-access to terms.
812 *
813 * This parameter determines the amount of computation required per query
814 * term, regardless of the number of documents that contain that term. In
815 * particular, it is the maximum number of other terms that must be
816 * scanned before a term is located and its frequency and position information
817 * may be processed. In a large index with user-entered query terms, query
818 * processing time is likely to be dominated not by term lookup but rather
819 * by the processing of frequency and positional data. In a small index
820 * or when many uncommon query terms are generated (e.g., by wildcard
821 * queries) term lookup may become a dominant cost.
822 *
823 * In particular, <code>numUniqueTerms/interval</code> terms are read into
824 * memory by an IndexReader, and, on average, <code>interval/2</code> terms
825 * must be scanned for each random term access.
826 *
827 * @see #DEFAULT_TERM_INDEX_INTERVAL
828 */
829 public void setTermIndexInterval(int interval) {
830 ensureOpen();
831 this.termIndexInterval = interval;
832 }
833
834 /** Expert: Return the interval between indexed terms.
835 *
836 * @see #setTermIndexInterval(int)
837 */
838 public int getTermIndexInterval() {
839 // We pass false because this method is called by SegmentMerger while we are in the process of closing
840 ensureOpen(false);
841 return termIndexInterval;
842 }
843
844 /**
845 * Constructs an IndexWriter for the index in <code>d</code>.
846 * Text will be analyzed with <code>a</code>. If <code>create</code>
847 * is true, then a new, empty index will be created in
848 * <code>d</code>, replacing the index already there, if any.
849 *
850 * @param d the index directory
851 * @param a the analyzer to use
852 * @param create <code>true</code> to create the index or overwrite
853 * the existing one; <code>false</code> to append to the existing
854 * index
855 * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
856 * via the MaxFieldLength constructor.
857 * @throws CorruptIndexException if the index is corrupt
858 * @throws LockObtainFailedException if another writer
859 * has this index open (<code>write.lock</code> could not
860 * be obtained)
861 * @throws IOException if the directory cannot be read/written to, or
862 * if it does not exist and <code>create</code> is
863 * <code>false</code> or if there is any other low-level
864 * IO error
865 */
866 public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)
867 throws CorruptIndexException, LockObtainFailedException, IOException {
868 init(d, a, create, null, mfl.getLimit(), null, null);
869 }
870
871 /**
872 * Constructs an IndexWriter for the index in
873 * <code>d</code>, first creating it if it does not
874 * already exist. Text will be analyzed with
875 * <code>a</code>.
876 *
877 * @param d the index directory
878 * @param a the analyzer to use
879 * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
880 * via the MaxFieldLength constructor.
881 * @throws CorruptIndexException if the index is corrupt
882 * @throws LockObtainFailedException if another writer
883 * has this index open (<code>write.lock</code> could not
884 * be obtained)
885 * @throws IOException if the directory cannot be
886 * read/written to or if there is any other low-level
887 * IO error
888 */
889 public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
890 throws CorruptIndexException, LockObtainFailedException, IOException {
891 init(d, a, null, mfl.getLimit(), null, null);
892 }
893
894 /**
895 * Expert: constructs an IndexWriter with a custom {@link
896 * IndexDeletionPolicy}, for the index in <code>d</code>,
897 * first creating it if it does not already exist. Text
898 * will be analyzed with <code>a</code>.
899 *
900 * @param d the index directory
901 * @param a the analyzer to use
902 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
903 * @param mfl whether or not to limit field lengths
904 * @throws CorruptIndexException if the index is corrupt
905 * @throws LockObtainFailedException if another writer
906 * has this index open (<code>write.lock</code> could not
907 * be obtained)
908 * @throws IOException if the directory cannot be
909 * read/written to or if there is any other low-level
910 * IO error
911 */
912 public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
913 throws CorruptIndexException, LockObtainFailedException, IOException {
914 init(d, a, deletionPolicy, mfl.getLimit(), null, null);
915 }
916
917 /**
918 * Expert: constructs an IndexWriter with a custom {@link
919 * IndexDeletionPolicy}, for the index in <code>d</code>.
920 * Text will be analyzed with <code>a</code>. If
921 * <code>create</code> is true, then a new, empty index
922 * will be created in <code>d</code>, replacing the index
923 * already there, if any.
924 *
925 * @param d the index directory
926 * @param a the analyzer to use
927 * @param create <code>true</code> to create the index or overwrite
928 * the existing one; <code>false</code> to append to the existing
929 * index
930 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
931 * @param mfl {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths. Value is in number of terms/tokens
932 * @throws CorruptIndexException if the index is corrupt
933 * @throws LockObtainFailedException if another writer
934 * has this index open (<code>write.lock</code> could not
935 * be obtained)
936 * @throws IOException if the directory cannot be read/written to, or
937 * if it does not exist and <code>create</code> is
938 * <code>false</code> or if there is any other low-level
939 * IO error
940 */
941 public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
942 throws CorruptIndexException, LockObtainFailedException, IOException {
943 init(d, a, create, deletionPolicy, mfl.getLimit(), null, null);
944 }
945
946 /**
947 * Expert: constructs an IndexWriter with a custom {@link
948 * IndexDeletionPolicy} and {@link IndexingChain},
949 * for the index in <code>d</code>.
950 * Text will be analyzed with <code>a</code>. If
951 * <code>create</code> is true, then a new, empty index
952 * will be created in <code>d</code>, replacing the index
953 * already there, if any.
954 *
955 * @param d the index directory
956 * @param a the analyzer to use
957 * @param create <code>true</code> to create the index or overwrite
958 * the existing one; <code>false</code> to append to the existing
959 * index
960 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
961 * @param mfl whether or not to limit field lengths, value is in number of terms/tokens. See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
962 * @param indexingChain the {@link DocConsumer} chain to be used to
963 * process documents
964 * @param commit which commit to open
965 * @throws CorruptIndexException if the index is corrupt
966 * @throws LockObtainFailedException if another writer
967 * has this index open (<code>write.lock</code> could not
968 * be obtained)
969 * @throws IOException if the directory cannot be read/written to, or
970 * if it does not exist and <code>create</code> is
971 * <code>false</code> or if there is any other low-level
972 * IO error
973 */
974 IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit)
975 throws CorruptIndexException, LockObtainFailedException, IOException {
976 init(d, a, create, deletionPolicy, mfl.getLimit(), indexingChain, commit);
977 }
978
979 /**
980 * Expert: constructs an IndexWriter on specific commit
981 * point, with a custom {@link IndexDeletionPolicy}, for
982 * the index in <code>d</code>. Text will be analyzed
983 * with <code>a</code>.
984 *
985 * <p> This is only meaningful if you've used a {@link
986 * IndexDeletionPolicy} in that past that keeps more than
987 * just the last commit.
988 *
989 * <p>This operation is similar to {@link #rollback()},
990 * except that method can only rollback what's been done
991 * with the current instance of IndexWriter since its last
992 * commit, whereas this method can rollback to an
993 * arbitrary commit point from the past, assuming the
994 * {@link IndexDeletionPolicy} has preserved past
995 * commits.
996 *
997 * @param d the index directory
998 * @param a the analyzer to use
999 * @param deletionPolicy see <a href="#deletionPolicy">above</a>
1000 * @param mfl whether or not to limit field lengths, value is in number of terms/tokens. See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
1001 * @param commit which commit to open
1002 * @throws CorruptIndexException if the index is corrupt
1003 * @throws LockObtainFailedException if another writer
1004 * has this index open (<code>write.lock</code> could not
1005 * be obtained)
1006 * @throws IOException if the directory cannot be read/written to, or
1007 * if it does not exist and <code>create</code> is
1008 * <code>false</code> or if there is any other low-level
1009 * IO error
1010 */
1011 public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)
1012 throws CorruptIndexException, LockObtainFailedException, IOException {
1013 init(d, a, false, deletionPolicy, mfl.getLimit(), null, commit);
1014 }
1015
1016 private void init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy,
1017 int maxFieldLength, IndexingChain indexingChain, IndexCommit commit)
1018 throws CorruptIndexException, LockObtainFailedException, IOException {
1019 if (IndexReader.indexExists(d)) {
1020 init(d, a, false, deletionPolicy, maxFieldLength, indexingChain, commit);
1021 } else {
1022 init(d, a, true, deletionPolicy, maxFieldLength, indexingChain, commit);
1023 }
1024 }
1025
1026 private void init(Directory d, Analyzer a, final boolean create,
1027 IndexDeletionPolicy deletionPolicy, int maxFieldLength,
1028 IndexingChain indexingChain, IndexCommit commit)
1029 throws CorruptIndexException, LockObtainFailedException, IOException {
1030
1031 directory = d;
1032 analyzer = a;
1033 setMessageID(defaultInfoStream);
1034 this.maxFieldLength = maxFieldLength;
1035
1036 if (indexingChain == null)
1037 indexingChain = DocumentsWriter.DefaultIndexingChain;
1038
1039 if (create) {
1040 // Clear the write lock in case it's leftover:
1041 directory.clearLock(WRITE_LOCK_NAME);
1042 }
1043
1044 Lock writeLock = directory.makeLock(WRITE_LOCK_NAME);
1045 if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
1046 throw new LockObtainFailedException("Index locked for write: " + writeLock);
1047 this.writeLock = writeLock; // save it
1048
1049 try {
1050 if (create) {
1051 // Try to read first. This is to allow create
1052 // against an index that's currently open for
1053 // searching. In this case we write the next
1054 // segments_N file with no segments:
1055 boolean doCommit;
1056 try {
1057 segmentInfos.read(directory);
1058 segmentInfos.clear();
1059 doCommit = false;
1060 } catch (IOException e) {
1061 // Likely this means it's a fresh directory
1062 doCommit = true;
1063 }
1064
1065 if (doCommit) {
1066 // Only commit if there is no segments file in
1067 // this dir already.
1068 segmentInfos.commit(directory);
1069 synced.addAll(segmentInfos.files(directory, true));
1070 } else {
1071 // Record that we have a change (zero out all
1072 // segments) pending:
1073 changeCount++;
1074 }
1075 } else {
1076 segmentInfos.read(directory);
1077
1078 if (commit != null) {
1079 // Swap out all segments, but, keep metadata in
1080 // SegmentInfos, like version & generation, to
1081 // preserve write-once. This is important if
1082 // readers are open against the future commit
1083 // points.
1084 if (commit.getDirectory() != directory)
1085 throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory");
1086 SegmentInfos oldInfos = new SegmentInfos();
1087 oldInfos.read(directory, commit.getSegmentsFileName());
1088 segmentInfos.replace(oldInfos);
1089 changeCount++;
1090 if (infoStream != null)
1091 message("init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
1092 }
1093
1094 // We assume that this segments_N was previously
1095 // properly sync'd:
1096 synced.addAll(segmentInfos.files(directory, true));
1097 }
1098
1099 setRollbackSegmentInfos(segmentInfos);
1100
1101 docWriter = new DocumentsWriter(directory, this, indexingChain);
1102 docWriter.setInfoStream(infoStream);
1103 docWriter.setMaxFieldLength(maxFieldLength);
1104
1105 // Default deleter (for backwards compatibility) is
1106 // KeepOnlyLastCommitDeleter:
1107 deleter = new IndexFileDeleter(directory,
1108 deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
1109 segmentInfos, infoStream, docWriter);
1110
1111 if (deleter.startingCommitDeleted)
1112 // Deletion policy deleted the "head" commit point.
1113 // We have to mark ourself as changed so that if we
1114 // are closed w/o any further changes we write a new
1115 // segments_N file.
1116 changeCount++;
1117
1118 pushMaxBufferedDocs();
1119
1120 if (infoStream != null) {
1121 message("init: create=" + create);
1122 messageState();
1123 }
1124
1125 } catch (IOException e) {
1126 this.writeLock.release();
1127 this.writeLock = null;
1128 throw e;
1129 }
1130 }
1131
1132 private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
1133 rollbackSegmentInfos = (SegmentInfos) infos.clone();
1134 assert !rollbackSegmentInfos.hasExternalSegments(directory);
1135 rollbackSegments = new HashMap<SegmentInfo,Integer>();
1136 final int size = rollbackSegmentInfos.size();
1137 for(int i=0;i<size;i++)
1138 rollbackSegments.put(rollbackSegmentInfos.info(i), Integer.valueOf(i));
1139 }
1140
1141 /**
1142 * Expert: set the merge policy used by this writer.
1143 */
1144 public void setMergePolicy(MergePolicy mp) {
1145 ensureOpen();
1146 if (mp == null)
1147 throw new NullPointerException("MergePolicy must be non-null");
1148
1149 if (mergePolicy != mp)
1150 mergePolicy.close();
1151 mergePolicy = mp;
1152 pushMaxBufferedDocs();
1153 if (infoStream != null)
1154 message("setMergePolicy " + mp);
1155 }
1156
1157 /**
1158 * Expert: returns the current MergePolicy in use by this writer.
1159 * @see #setMergePolicy
1160 */
1161 public MergePolicy getMergePolicy() {
1162 ensureOpen();
1163 return mergePolicy;
1164 }
1165
1166 /**
1167 * Expert: set the merge scheduler used by this writer.
1168 */
1169 synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {
1170 ensureOpen();
1171 if (mergeScheduler == null)
1172 throw new NullPointerException("MergeScheduler must be non-null");
1173
1174 if (this.mergeScheduler != mergeScheduler) {
1175 finishMerges(true);
1176 this.mergeScheduler.close();
1177 }
1178 this.mergeScheduler = mergeScheduler;
1179 if (infoStream != null)
1180 message("setMergeScheduler " + mergeScheduler);
1181 }
1182
1183 /**
1184 * Expert: returns the current MergePolicy in use by this
1185 * writer.
1186 * @see #setMergePolicy
1187 */
1188 public MergeScheduler getMergeScheduler() {
1189 ensureOpen();
1190 return mergeScheduler;
1191 }
1192
1193 /** <p>Determines the largest segment (measured by
1194 * document count) that may be merged with other segments.
1195 * Small values (e.g., less than 10,000) are best for
1196 * interactive indexing, as this limits the length of
1197 * pauses while indexing to a few seconds. Larger values
1198 * are best for batched indexing and speedier
1199 * searches.</p>
1200 *
1201 * <p>The default value is {@link Integer#MAX_VALUE}.</p>
1202 *
1203 * <p>Note that this method is a convenience method: it
1204 * just calls mergePolicy.setMaxMergeDocs as long as
1205 * mergePolicy is an instance of {@link LogMergePolicy}.
1206 * Otherwise an IllegalArgumentException is thrown.</p>
1207 *
1208 * <p>The default merge policy ({@link
1209 * LogByteSizeMergePolicy}) also allows you to set this
1210 * limit by net size (in MB) of the segment, using {@link
1211 * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
1212 */
1213 public void setMaxMergeDocs(int maxMergeDocs) {
1214 getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
1215 }
1216
1217 /**
1218 * <p>Returns the largest segment (measured by document
1219 * count) that may be merged with other segments.</p>
1220 *
1221 * <p>Note that this method is a convenience method: it
1222 * just calls mergePolicy.getMaxMergeDocs as long as
1223 * mergePolicy is an instance of {@link LogMergePolicy}.
1224 * Otherwise an IllegalArgumentException is thrown.</p>
1225 *
1226 * @see #setMaxMergeDocs
1227 */
1228 public int getMaxMergeDocs() {
1229 return getLogMergePolicy().getMaxMergeDocs();
1230 }
1231
1232 /**
1233 * The maximum number of terms that will be indexed for a single field in a
1234 * document. This limits the amount of memory required for indexing, so that
1235 * collections with very large files will not crash the indexing process by
1236 * running out of memory. This setting refers to the number of running terms,
1237 * not to the number of different terms.<p/>
1238 * <strong>Note:</strong> this silently truncates large documents, excluding from the
1239 * index all terms that occur further in the document. If you know your source
1240 * documents are large, be sure to set this value high enough to accomodate
1241 * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
1242 * is your memory, but you should anticipate an OutOfMemoryError.<p/>
1243 * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms
1244 * will be indexed for a field.
1245 */
1246 public void setMaxFieldLength(int maxFieldLength) {
1247 ensureOpen();
1248 this.maxFieldLength = maxFieldLength;
1249 docWriter.setMaxFieldLength(maxFieldLength);
1250 if (infoStream != null)
1251 message("setMaxFieldLength " + maxFieldLength);
1252 }
1253
1254 /**
1255 * Returns the maximum number of terms that will be
1256 * indexed for a single field in a document.
1257 * @see #setMaxFieldLength
1258 */
1259 public int getMaxFieldLength() {
1260 ensureOpen();
1261 return maxFieldLength;
1262 }
1263
1264 /** Determines the minimal number of documents required
1265 * before the buffered in-memory documents are flushed as
1266 * a new Segment. Large values generally gives faster
1267 * indexing.
1268 *
1269 * <p>When this is set, the writer will flush every
1270 * maxBufferedDocs added documents. Pass in {@link
1271 * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
1272 * to number of buffered documents. Note that if flushing
1273 * by RAM usage is also enabled, then the flush will be
1274 * triggered by whichever comes first.</p>
1275 *
1276 * <p>Disabled by default (writer flushes by RAM usage).</p>
1277 *
1278 * @throws IllegalArgumentException if maxBufferedDocs is
1279 * enabled but smaller than 2, or it disables maxBufferedDocs
1280 * when ramBufferSize is already disabled
1281 * @see #setRAMBufferSizeMB
1282 */
1283 public void setMaxBufferedDocs(int maxBufferedDocs) {
1284 ensureOpen();
1285 if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
1286 throw new IllegalArgumentException(
1287 "maxBufferedDocs must at least be 2 when enabled");
1288 if (maxBufferedDocs == DISABLE_AUTO_FLUSH
1289 && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
1290 throw new IllegalArgumentException(
1291 "at least one of ramBufferSize and maxBufferedDocs must be enabled");
1292 docWriter.setMaxBufferedDocs(maxBufferedDocs);
1293 pushMaxBufferedDocs();
1294 if (infoStream != null)
1295 message("setMaxBufferedDocs " + maxBufferedDocs);
1296 }
1297
1298 /**
1299 * If we are flushing by doc count (not by RAM usage), and
1300 * using LogDocMergePolicy then push maxBufferedDocs down
1301 * as its minMergeDocs, to keep backwards compatibility.
1302 */
1303 private void pushMaxBufferedDocs() {
1304 if (docWriter.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {
1305 final MergePolicy mp = mergePolicy;
1306 if (mp instanceof LogDocMergePolicy) {
1307 LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
1308 final int maxBufferedDocs = docWriter.getMaxBufferedDocs();
1309 if (lmp.getMinMergeDocs() != maxBufferedDocs) {
1310 if (infoStream != null)
1311 message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
1312 lmp.setMinMergeDocs(maxBufferedDocs);
1313 }
1314 }
1315 }
1316 }
1317
1318 /**
1319 * Returns the number of buffered added documents that will
1320 * trigger a flush if enabled.
1321 * @see #setMaxBufferedDocs
1322 */
1323 public int getMaxBufferedDocs() {
1324 ensureOpen();
1325 return docWriter.getMaxBufferedDocs();
1326 }
1327
1328 /** Determines the amount of RAM that may be used for
1329 * buffering added documents and deletions before they are
1330 * flushed to the Directory. Generally for faster
1331 * indexing performance it's best to flush by RAM usage
1332 * instead of document count and use as large a RAM buffer
1333 * as you can.
1334 *
1335 * <p>When this is set, the writer will flush whenever
1336 * buffered documents and deletions use this much RAM.
1337 * Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
1338 * triggering a flush due to RAM usage. Note that if
1339 * flushing by document count is also enabled, then the
1340 * flush will be triggered by whichever comes first.</p>
1341 *
1342 * <p> <b>NOTE</b>: the account of RAM usage for pending
1343 * deletions is only approximate. Specifically, if you
1344 * delete by Query, Lucene currently has no way to measure
1345 * the RAM usage if individual Queries so the accounting
1346 * will under-estimate and you should compensate by either
1347 * calling commit() periodically yourself, or by using
1348 * {@link #setMaxBufferedDeleteTerms} to flush by count
1349 * instead of RAM usage (each buffered delete Query counts
1350 * as one).
1351 *
1352 * <p> <b>NOTE</b>: because IndexWriter uses
1353 * <code>int</code>s when managing its internal storage,
1354 * the absolute maximum value for this setting is somewhat
1355 * less than 2048 MB. The precise limit depends on
1356 * various factors, such as how large your documents are,
1357 * how many fields have norms, etc., so it's best to set
1358 * this value comfortably under 2048.</p>
1359 *
1360 * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
1361 *
1362 * @throws IllegalArgumentException if ramBufferSize is
1363 * enabled but non-positive, or it disables ramBufferSize
1364 * when maxBufferedDocs is already disabled
1365 */
1366 public void setRAMBufferSizeMB(double mb) {
1367 if (mb > 2048.0) {
1368 throw new IllegalArgumentException("ramBufferSize " + mb + " is too large; should be comfortably less than 2048");
1369 }
1370 if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
1371 throw new IllegalArgumentException(
1372 "ramBufferSize should be > 0.0 MB when enabled");
1373 if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
1374 throw new IllegalArgumentException(
1375 "at least one of ramBufferSize and maxBufferedDocs must be enabled");
1376 docWriter.setRAMBufferSizeMB(mb);
1377 if (infoStream != null)
1378 message("setRAMBufferSizeMB " + mb);
1379 }
1380
1381 /**
1382 * Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
1383 */
1384 public double getRAMBufferSizeMB() {
1385 return docWriter.getRAMBufferSizeMB();
1386 }
1387
1388 /**
1389 * <p>Determines the minimal number of delete terms required before the buffered
1390 * in-memory delete terms are applied and flushed. If there are documents
1391 * buffered in memory at the time, they are merged and a new segment is
1392 * created.</p>
1393
1394 * <p>Disabled by default (writer flushes by RAM usage).</p>
1395 *
1396 * @throws IllegalArgumentException if maxBufferedDeleteTerms
1397 * is enabled but smaller than 1
1398 * @see #setRAMBufferSizeMB
1399 */
1400 public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
1401 ensureOpen();
1402 if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
1403 && maxBufferedDeleteTerms < 1)
1404 throw new IllegalArgumentException(
1405 "maxBufferedDeleteTerms must at least be 1 when enabled");
1406 docWriter.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
1407 if (infoStream != null)
1408 message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
1409 }
1410
1411 /**
1412 * Returns the number of buffered deleted terms that will
1413 * trigger a flush if enabled.
1414 * @see #setMaxBufferedDeleteTerms
1415 */
1416 public int getMaxBufferedDeleteTerms() {
1417 ensureOpen();
1418 return docWriter.getMaxBufferedDeleteTerms();
1419 }
1420
1421 /** Determines how often segment indices are merged by addDocument(). With
1422 * smaller values, less RAM is used while indexing, and searches on
1423 * unoptimized indices are faster, but indexing speed is slower. With larger
1424 * values, more RAM is used during indexing, and while searches on unoptimized
1425 * indices are slower, indexing is faster. Thus larger values (> 10) are best
1426 * for batch index creation, and smaller values (< 10) for indices that are
1427 * interactively maintained.
1428 *
1429 * <p>Note that this method is a convenience method: it
1430 * just calls mergePolicy.setMergeFactor as long as
1431 * mergePolicy is an instance of {@link LogMergePolicy}.
1432 * Otherwise an IllegalArgumentException is thrown.</p>
1433 *
1434 * <p>This must never be less than 2. The default value is 10.
1435 */
1436 public void setMergeFactor(int mergeFactor) {
1437 getLogMergePolicy().setMergeFactor(mergeFactor);
1438 }
1439
1440 /**
1441 * <p>Returns the number of segments that are merged at
1442 * once and also controls the total number of segments
1443 * allowed to accumulate in the index.</p>
1444 *
1445 * <p>Note that this method is a convenience method: it
1446 * just calls mergePolicy.getMergeFactor as long as
1447 * mergePolicy is an instance of {@link LogMergePolicy}.
1448 * Otherwise an IllegalArgumentException is thrown.</p>
1449 *
1450 * @see #setMergeFactor
1451 */
1452 public int getMergeFactor() {
1453 return getLogMergePolicy().getMergeFactor();
1454 }
1455
1456 /** If non-null, this will be the default infoStream used
1457 * by a newly instantiated IndexWriter.
1458 * @see #setInfoStream
1459 */
1460 public static void setDefaultInfoStream(PrintStream infoStream) {
1461 IndexWriter.defaultInfoStream = infoStream;
1462 }
1463
1464 /**
1465 * Returns the current default infoStream for newly
1466 * instantiated IndexWriters.
1467 * @see #setDefaultInfoStream
1468 */
1469 public static PrintStream getDefaultInfoStream() {
1470 return IndexWriter.defaultInfoStream;
1471 }
1472
1473 /** If non-null, information about merges, deletes and a
1474 * message when maxFieldLength is reached will be printed
1475 * to this.
1476 */
1477 public void setInfoStream(PrintStream infoStream) {
1478 ensureOpen();
1479 setMessageID(infoStream);
1480 docWriter.setInfoStream(infoStream);
1481 deleter.setInfoStream(infoStream);
1482 if (infoStream != null)
1483 messageState();
1484 }
1485
1486 private void messageState() {
1487 message("setInfoStream: dir=" + directory +
1488 " mergePolicy=" + mergePolicy +
1489 " mergeScheduler=" + mergeScheduler +
1490 " ramBufferSizeMB=" + docWriter.getRAMBufferSizeMB() +
1491 " maxBufferedDocs=" + docWriter.getMaxBufferedDocs() +
1492 " maxBuffereDeleteTerms=" + docWriter.getMaxBufferedDeleteTerms() +
1493 " maxFieldLength=" + maxFieldLength +
1494 " index=" + segString());
1495 }
1496
1497 /**
1498 * Returns the current infoStream in use by this writer.
1499 * @see #setInfoStream
1500 */
1501 public PrintStream getInfoStream() {
1502 ensureOpen();
1503 return infoStream;
1504 }
1505
1506 /** Returns true if verbosing is enabled (i.e., infoStream != null). */
1507 public boolean verbose() {
1508 return infoStream != null;
1509 }
1510
1511 /**
1512 * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see
1513 * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
1514 */
1515 public void setWriteLockTimeout(long writeLockTimeout) {
1516 ensureOpen();
1517 this.writeLockTimeout = writeLockTimeout;
1518 }
1519
1520 /**
1521 * Returns allowed timeout when acquiring the write lock.
1522 * @see #setWriteLockTimeout
1523 */
1524 public long getWriteLockTimeout() {
1525 ensureOpen();
1526 return writeLockTimeout;
1527 }
1528
1529 /**
1530 * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
1531 * milliseconds).
1532 */
1533 public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
1534 IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;
1535 }
1536
1537 /**
1538 * Returns default write lock timeout for newly
1539 * instantiated IndexWriters.
1540 * @see #setDefaultWriteLockTimeout
1541 */
1542 public static long getDefaultWriteLockTimeout() {
1543 return IndexWriter.WRITE_LOCK_TIMEOUT;
1544 }
1545
1546 /**
1547 * Commits all changes to an index and closes all
1548 * associated files. Note that this may be a costly
1549 * operation, so, try to re-use a single writer instead of
1550 * closing and opening a new one. See {@link #commit()} for
1551 * caveats about write caching done by some IO devices.
1552 *
1553 * <p> If an Exception is hit during close, eg due to disk
1554 * full or some other reason, then both the on-disk index
1555 * and the internal state of the IndexWriter instance will
1556 * be consistent. However, the close will not be complete
1557 * even though part of it (flushing buffered documents)
1558 * may have succeeded, so the write lock will still be
1559 * held.</p>
1560 *
1561 * <p> If you can correct the underlying cause (eg free up
1562 * some disk space) then you can call close() again.
1563 * Failing that, if you want to force the write lock to be
1564 * released (dangerous, because you may then lose buffered
1565 * docs in the IndexWriter instance) then you can do
1566 * something like this:</p>
1567 *
1568 * <pre>
1569 * try {
1570 * writer.close();
1571 * } finally {
1572 * if (IndexWriter.isLocked(directory)) {
1573 * IndexWriter.unlock(directory);
1574 * }
1575 * }
1576 * </pre>
1577 *
1578 * after which, you must be certain not to use the writer
1579 * instance anymore.</p>
1580 *
1581 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1582 * you should immediately close the writer, again. See <a
1583 * href="#OOME">above</a> for details.</p>
1584 *
1585 * @throws CorruptIndexException if the index is corrupt
1586 * @throws IOException if there is a low-level IO error
1587 */
1588 public void close() throws CorruptIndexException, IOException {
1589 close(true);
1590 }
1591
1592 /**
1593 * Closes the index with or without waiting for currently
1594 * running merges to finish. This is only meaningful when
1595 * using a MergeScheduler that runs merges in background
1596 * threads.
1597 *
1598 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1599 * you should immediately close the writer, again. See <a
1600 * href="#OOME">above</a> for details.</p>
1601 *
1602 * <p><b>NOTE</b>: it is dangerous to always call
1603 * close(false), especially when IndexWriter is not open
1604 * for very long, because this can result in "merge
1605 * starvation" whereby long merges will never have a
1606 * chance to finish. This will cause too many segments in
1607 * your index over time.</p>
1608 *
1609 * @param waitForMerges if true, this call will block
1610 * until all merges complete; else, it will ask all
1611 * running merges to abort, wait until those merges have
1612 * finished (which should be at most a few seconds), and
1613 * then return.
1614 */
1615 public void close(boolean waitForMerges) throws CorruptIndexException, IOException {
1616
1617 // Ensure that only one thread actually gets to do the closing:
1618 if (shouldClose()) {
1619 // If any methods have hit OutOfMemoryError, then abort
1620 // on close, in case the internal state of IndexWriter
1621 // or DocumentsWriter is corrupt
1622 if (hitOOM)
1623 rollbackInternal();
1624 else
1625 closeInternal(waitForMerges);
1626 }
1627 }
1628
1629 // Returns true if this thread should attempt to close, or
1630 // false if IndexWriter is now closed; else, waits until
1631 // another thread finishes closing
1632 synchronized private boolean shouldClose() {
1633 while(true) {
1634 if (!closed) {
1635 if (!closing) {
1636 closing = true;
1637 return true;
1638 } else {
1639 // Another thread is presently trying to close;
1640 // wait until it finishes one way (closes
1641 // successfully) or another (fails to close)
1642 doWait();
1643 }
1644 } else
1645 return false;
1646 }
1647 }
1648
1649 private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {
1650
1651 docWriter.pauseAllThreads();
1652
1653 try {
1654 if (infoStream != null)
1655 message("now flush at close");
1656
1657 docWriter.close();
1658
1659 // Only allow a new merge to be triggered if we are
1660 // going to wait for merges:
1661 if (!hitOOM) {
1662 flush(waitForMerges, true, true);
1663 }
1664
1665 if (waitForMerges)
1666 // Give merge scheduler last chance to run, in case
1667 // any pending merges are waiting:
1668 mergeScheduler.merge(this);
1669
1670 mergePolicy.close();
1671
1672 finishMerges(waitForMerges);
1673 stopMerges = true;
1674
1675 mergeScheduler.close();
1676
1677 if (infoStream != null)
1678 message("now call final commit()");
1679
1680 if (!hitOOM) {
1681 commit(0);
1682 }
1683
1684 if (infoStream != null)
1685 message("at close: " + segString());
1686
1687 synchronized(this) {
1688 readerPool.close();
1689 docWriter = null;
1690 deleter.close();
1691 }
1692
1693 if (writeLock != null) {
1694 writeLock.release(); // release write lock
1695 writeLock = null;
1696 }
1697 synchronized(this) {
1698 closed = true;
1699 }
1700 } catch (OutOfMemoryError oom) {
1701 handleOOM(oom, "closeInternal");
1702 } finally {
1703 synchronized(this) {
1704 closing = false;
1705 notifyAll();
1706 if (!closed) {
1707 if (docWriter != null)
1708 docWriter.resumeAllThreads();
1709 if (infoStream != null)
1710 message("hit exception while closing");
1711 }
1712 }
1713 }
1714 }
1715
1716 /** Tells the docWriter to close its currently open shared
1717 * doc stores (stored fields & vectors files).
1718 * Return value specifices whether new doc store files are compound or not.
1719 */
1720 private synchronized boolean flushDocStores() throws IOException {
1721
1722 boolean useCompoundDocStore = false;
1723
1724 String docStoreSegment;
1725
1726 boolean success = false;
1727 try {
1728 docStoreSegment = docWriter.closeDocStore();
1729 success = true;
1730 } finally {
1731 if (!success && infoStream != null) {
1732 message("hit exception closing doc store segment");
1733 }
1734 }
1735
1736 useCompoundDocStore = mergePolicy.useCompoundDocStore(segmentInfos);
1737
1738 if (useCompoundDocStore && docStoreSegment != null && docWriter.closedFiles().size() != 0) {
1739 // Now build compound doc store file
1740
1741 if (infoStream != null) {
1742 message("create compound file " + docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
1743 }
1744
1745 success = false;
1746
1747 final int numSegments = segmentInfos.size();
1748 final String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
1749
1750 try {
1751 CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName);
1752 for (final String file : docWriter.closedFiles() ) {
1753 cfsWriter.addFile(file);
1754 }
1755
1756 // Perform the merge
1757 cfsWriter.close();
1758 success = true;
1759
1760 } finally {
1761 if (!success) {
1762 if (infoStream != null)
1763 message("hit exception building compound file doc store for segment " + docStoreSegment);
1764 deleter.deleteFile(compoundFileName);
1765 docWriter.abort();
1766 }
1767 }
1768
1769 for(int i=0;i<numSegments;i++) {
1770 SegmentInfo si = segmentInfos.info(i);
1771 if (si.getDocStoreOffset() != -1 &&
1772 si.getDocStoreSegment().equals(docStoreSegment))
1773 si.setDocStoreIsCompoundFile(true);
1774 }
1775
1776 checkpoint();
1777
1778 // In case the files we just merged into a CFS were
1779 // not previously checkpointed:
1780 deleter.deleteNewFiles(docWriter.closedFiles());
1781 }
1782
1783 return useCompoundDocStore;
1784 }
1785
1786 /** Returns the Directory used by this index. */
1787 public Directory getDirectory() {
1788 // Pass false because the flush during closing calls getDirectory
1789 ensureOpen(false);
1790 return directory;
1791 }
1792
1793 /** Returns the analyzer used by this index. */
1794 public Analyzer getAnalyzer() {
1795 ensureOpen();
1796 return analyzer;
1797 }
1798
1799 /** Returns total number of docs in this index, including
1800 * docs not yet flushed (still in the RAM buffer),
1801 * not counting deletions.
1802 * @see #numDocs */
1803 public synchronized int maxDoc() {
1804 int count;
1805 if (docWriter != null)
1806 count = docWriter.getNumDocsInRAM();
1807 else
1808 count = 0;
1809
1810 for (int i = 0; i < segmentInfos.size(); i++)
1811 count += segmentInfos.info(i).docCount;
1812 return count;
1813 }
1814
1815 /** Returns total number of docs in this index, including
1816 * docs not yet flushed (still in the RAM buffer), and
1817 * including deletions. <b>NOTE:</b> buffered deletions
1818 * are not counted. If you really need these to be
1819 * counted you should call {@link #commit()} first.
1820 * @see #numDocs */
1821 public synchronized int numDocs() throws IOException {
1822 int count;
1823 if (docWriter != null)
1824 count = docWriter.getNumDocsInRAM();
1825 else
1826 count = 0;
1827
1828 for (int i = 0; i < segmentInfos.size(); i++) {
1829 final SegmentInfo info = segmentInfos.info(i);
1830 count += info.docCount - info.getDelCount();
1831 }
1832 return count;
1833 }
1834
1835 public synchronized boolean hasDeletions() throws IOException {
1836 ensureOpen();
1837 if (docWriter.hasDeletes())
1838 return true;
1839 for (int i = 0; i < segmentInfos.size(); i++)
1840 if (segmentInfos.info(i).hasDeletions())
1841 return true;
1842 return false;
1843 }
1844
1845 /**
1846 * The maximum number of terms that will be indexed for a single field in a
1847 * document. This limits the amount of memory required for indexing, so that
1848 * collections with very large files will not crash the indexing process by
1849 * running out of memory.<p/>
1850 * Note that this effectively truncates large documents, excluding from the
1851 * index terms that occur further in the document. If you know your source
1852 * documents are large, be sure to set this value high enough to accommodate
1853 * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
1854 * is your memory, but you should anticipate an OutOfMemoryError.<p/>
1855 * By default, no more than 10,000 terms will be indexed for a field.
1856 *
1857 * @see MaxFieldLength
1858 */
1859 private int maxFieldLength;
1860
1861 /**
1862 * Adds a document to this index. If the document contains more than
1863 * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
1864 * discarded.
1865 *
1866 * <p> Note that if an Exception is hit (for example disk full)
1867 * then the index will be consistent, but this document
1868 * may not have been added. Furthermore, it's possible
1869 * the index will have one segment in non-compound format
1870 * even when using compound files (when a merge has
1871 * partially succeeded).</p>
1872 *
1873 * <p> This method periodically flushes pending documents
1874 * to the Directory (see <a href="#flush">above</a>), and
1875 * also periodically triggers segment merges in the index
1876 * according to the {@link MergePolicy} in use.</p>
1877 *
1878 * <p>Merges temporarily consume space in the
1879 * directory. The amount of space required is up to 1X the
1880 * size of all segments being merged, when no
1881 * readers/searchers are open against the index, and up to
1882 * 2X the size of all segments being merged when
1883 * readers/searchers are open against the index (see
1884 * {@link #optimize()} for details). The sequence of
1885 * primitive merge operations performed is governed by the
1886 * merge policy.
1887 *
1888 * <p>Note that each term in the document can be no longer
1889 * than 16383 characters, otherwise an
1890 * IllegalArgumentException will be thrown.</p>
1891 *
1892 * <p>Note that it's possible to create an invalid Unicode
1893 * string in java if a UTF16 surrogate pair is malformed.
1894 * In this case, the invalid characters are silently
1895 * replaced with the Unicode replacement character
1896 * U+FFFD.</p>
1897 *
1898 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1899 * you should immediately close the writer. See <a
1900 * href="#OOME">above</a> for details.</p>
1901 *
1902 * @throws CorruptIndexException if the index is corrupt
1903 * @throws IOException if there is a low-level IO error
1904 */
1905 public void addDocument(Document doc) throws CorruptIndexException, IOException {
1906 addDocument(doc, analyzer);
1907 }
1908
1909 /**
1910 * Adds a document to this index, using the provided analyzer instead of the
1911 * value of {@link #getAnalyzer()}. If the document contains more than
1912 * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
1913 * discarded.
1914 *
1915 * <p>See {@link #addDocument(Document)} for details on
1916 * index and IndexWriter state after an Exception, and
1917 * flushing/merging temporary free space requirements.</p>
1918 *
1919 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1920 * you should immediately close the writer. See <a
1921 * href="#OOME">above</a> for details.</p>
1922 *
1923 * @throws CorruptIndexException if the index is corrupt
1924 * @throws IOException if there is a low-level IO error
1925 */
1926 public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
1927 ensureOpen();
1928 boolean doFlush = false;
1929 boolean success = false;
1930 try {
1931 try {
1932 doFlush = docWriter.addDocument(doc, analyzer);
1933 success = true;
1934 } finally {
1935 if (!success) {
1936
1937 if (infoStream != null)
1938 message("hit exception adding document");
1939
1940 synchronized (this) {
1941 // If docWriter has some aborted files that were
1942 // never incref'd, then we clean them up here
1943 if (docWriter != null) {
1944 final Collection<String> files = docWriter.abortedFiles();
1945 if (files != null)
1946 deleter.deleteNewFiles(files);
1947 }
1948 }
1949 }
1950 }
1951 if (doFlush)
1952 flush(true, false, false);
1953 } catch (OutOfMemoryError oom) {
1954 handleOOM(oom, "addDocument");
1955 }
1956 }
1957
1958 /**
1959 * Deletes the document(s) containing <code>term</code>.
1960 *
1961 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1962 * you should immediately close the writer. See <a
1963 * href="#OOME">above</a> for details.</p>
1964 *
1965 * @param term the term to identify the documents to be deleted
1966 * @throws CorruptIndexException if the index is corrupt
1967 * @throws IOException if there is a low-level IO error
1968 */
1969 public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
1970 ensureOpen();
1971 try {
1972 boolean doFlush = docWriter.bufferDeleteTerm(term);
1973 if (doFlush)
1974 flush(true, false, false);
1975 } catch (OutOfMemoryError oom) {
1976 handleOOM(oom, "deleteDocuments(Term)");
1977 }
1978 }
1979
1980 /**
1981 * Deletes the document(s) containing any of the
1982 * terms. All deletes are flushed at the same time.
1983 *
1984 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
1985 * you should immediately close the writer. See <a
1986 * href="#OOME">above</a> for details.</p>
1987 *
1988 * @param terms array of terms to identify the documents
1989 * to be deleted
1990 * @throws CorruptIndexException if the index is corrupt
1991 * @throws IOException if there is a low-level IO error
1992 */
1993 public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException {
1994 ensureOpen();
1995 try {
1996 boolean doFlush = docWriter.bufferDeleteTerms(terms);
1997 if (doFlush)
1998 flush(true, false, false);
1999 } catch (OutOfMemoryError oom) {
2000 handleOOM(oom, "deleteDocuments(Term..)");
2001 }
2002 }
2003
2004 /**
2005 * Deletes the document(s) matching the provided query.
2006 *
2007 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2008 * you should immediately close the writer. See <a
2009 * href="#OOME">above</a> for details.</p>
2010 *
2011 * @param query the query to identify the documents to be deleted
2012 * @throws CorruptIndexException if the index is corrupt
2013 * @throws IOException if there is a low-level IO error
2014 */
2015 public void deleteDocuments(Query query) throws CorruptIndexException, IOException {
2016 ensureOpen();
2017 boolean doFlush = docWriter.bufferDeleteQuery(query);
2018 if (doFlush)
2019 flush(true, false, false);
2020 }
2021
2022 /**
2023 * Deletes the document(s) matching any of the provided queries.
2024 * All deletes are flushed at the same time.
2025 *
2026 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2027 * you should immediately close the writer. See <a
2028 * href="#OOME">above</a> for details.</p>
2029 *
2030 * @param queries array of queries to identify the documents
2031 * to be deleted
2032 * @throws CorruptIndexException if the index is corrupt
2033 * @throws IOException if there is a low-level IO error
2034 */
2035 public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException {
2036 ensureOpen();
2037 boolean doFlush = docWriter.bufferDeleteQueries(queries);
2038 if (doFlush)
2039 flush(true, false, false);
2040 }
2041
2042 /**
2043 * Updates a document by first deleting the document(s)
2044 * containing <code>term</code> and then adding the new
2045 * document. The delete and then add are atomic as seen
2046 * by a reader on the same index (flush may happen only after
2047 * the add).
2048 *
2049 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2050 * you should immediately close the writer. See <a
2051 * href="#OOME">above</a> for details.</p>
2052 *
2053 * @param term the term to identify the document(s) to be
2054 * deleted
2055 * @param doc the document to be added
2056 * @throws CorruptIndexException if the index is corrupt
2057 * @throws IOException if there is a low-level IO error
2058 */
2059 public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {
2060 ensureOpen();
2061 updateDocument(term, doc, getAnalyzer());
2062 }
2063
2064 /**
2065 * Updates a document by first deleting the document(s)
2066 * containing <code>term</code> and then adding the new
2067 * document. The delete and then add are atomic as seen
2068 * by a reader on the same index (flush may happen only after
2069 * the add).
2070 *
2071 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2072 * you should immediately close the writer. See <a
2073 * href="#OOME">above</a> for details.</p>
2074 *
2075 * @param term the term to identify the document(s) to be
2076 * deleted
2077 * @param doc the document to be added
2078 * @param analyzer the analyzer to use when analyzing the document
2079 * @throws CorruptIndexException if the index is corrupt
2080 * @throws IOException if there is a low-level IO error
2081 */
2082 public void updateDocument(Term term, Document doc, Analyzer analyzer)
2083 throws CorruptIndexException, IOException {
2084 ensureOpen();
2085 try {
2086 boolean doFlush = false;
2087 boolean success = false;
2088 try {
2089 doFlush = docWriter.updateDocument(term, doc, analyzer);
2090 success = true;
2091 } finally {
2092 if (!success) {
2093
2094 if (infoStream != null)
2095 message("hit exception updating document");
2096
2097 synchronized (this) {
2098 // If docWriter has some aborted files that were
2099 // never incref'd, then we clean them up here
2100 final Collection<String> files = docWriter.abortedFiles();
2101 if (files != null)
2102 deleter.deleteNewFiles(files);
2103 }
2104 }
2105 }
2106 if (doFlush)
2107 flush(true, false, false);
2108 } catch (OutOfMemoryError oom) {
2109 handleOOM(oom, "updateDocument");
2110 }
2111 }
2112
2113 // for test purpose
2114 final synchronized int getSegmentCount(){
2115 return segmentInfos.size();
2116 }
2117
2118 // for test purpose
2119 final synchronized int getNumBufferedDocuments(){
2120 return docWriter.getNumDocsInRAM();
2121 }
2122
2123 // for test purpose
2124 final synchronized int getDocCount(int i) {
2125 if (i >= 0 && i < segmentInfos.size()) {
2126 return segmentInfos.info(i).docCount;
2127 } else {
2128 return -1;
2129 }
2130 }
2131
2132 // for test purpose
2133 final synchronized int getFlushCount() {
2134 return flushCount;
2135 }
2136
2137 // for test purpose
2138 final synchronized int getFlushDeletesCount() {
2139 return flushDeletesCount;
2140 }
2141
2142 final String newSegmentName() {
2143 // Cannot synchronize on IndexWriter because that causes
2144 // deadlock
2145 synchronized(segmentInfos) {
2146 // Important to increment changeCount so that the
2147 // segmentInfos is written on close. Otherwise we
2148 // could close, re-open and re-return the same segment
2149 // name that was previously returned which can cause
2150 // problems at least with ConcurrentMergeScheduler.
2151 changeCount++;
2152 return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
2153 }
2154 }
2155
2156 /** If non-null, information about merges will be printed to this.
2157 */
2158 private PrintStream infoStream = null;
2159 private static PrintStream defaultInfoStream = null;
2160
2161 /**
2162 * Requests an "optimize" operation on an index, priming the index
2163 * for the fastest available search. Traditionally this has meant
2164 * merging all segments into a single segment as is done in the
2165 * default merge policy, but individual merge policies may implement
2166 * optimize in different ways.
2167 *
2168 * <p>It is recommended that this method be called upon completion of indexing. In
2169 * environments with frequent updates, optimize is best done during low volume times, if at all.
2170 *
2171 * </p>
2172 * <p>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. </p>
2173 *
2174 * <p>Note that optimize requires 2X the index size free
2175 * space in your Directory. For example, if your index
2176 * size is 10 MB then you need 20 MB free for optimize to
2177 * complete.</p>
2178 *
2179 * <p>If some but not all readers re-open while an
2180 * optimize is underway, this will cause > 2X temporary
2181 * space to be consumed as those new readers will then
2182 * hold open the partially optimized segments at that
2183 * time. It is best not to re-open readers while optimize
2184 * is running.</p>
2185 *
2186 * <p>The actual temporary usage could be much less than
2187 * these figures (it depends on many factors).</p>
2188 *
2189 * <p>In general, once the optimize completes, the total size of the
2190 * index will be less than the size of the starting index.
2191 * It could be quite a bit smaller (if there were many
2192 * pending deletes) or just slightly smaller.</p>
2193 *
2194 * <p>If an Exception is hit during optimize(), for example
2195 * due to disk full, the index will not be corrupt and no
2196 * documents will have been lost. However, it may have
2197 * been partially optimized (some segments were merged but
2198 * not all), and it's possible that one of the segments in
2199 * the index will be in non-compound format even when
2200 * using compound file format. This will occur when the
2201 * Exception is hit during conversion of the segment into
2202 * compound format.</p>
2203 *
2204 * <p>This call will optimize those segments present in
2205 * the index when the call started. If other threads are
2206 * still adding documents and flushing segments, those
2207 * newly created segments will not be optimized unless you
2208 * call optimize again.</p>
2209 *
2210 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2211 * you should immediately close the writer. See <a
2212 * href="#OOME">above</a> for details.</p>
2213 *
2214 * @throws CorruptIndexException if the index is corrupt
2215 * @throws IOException if there is a low-level IO error
2216 * @see LogMergePolicy#findMergesForOptimize
2217 */
2218 public void optimize() throws CorruptIndexException, IOException {
2219 optimize(true);
2220 }
2221
2222 /**
2223 * Optimize the index down to <= maxNumSegments. If
2224 * maxNumSegments==1 then this is the same as {@link
2225 * #optimize()}.
2226 *
2227 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2228 * you should immediately close the writer. See <a
2229 * href="#OOME">above</a> for details.</p>
2230 *
2231 * @param maxNumSegments maximum number of segments left
2232 * in the index after optimization finishes
2233 */
2234 public void optimize(int maxNumSegments) throws CorruptIndexException, IOException {
2235 optimize(maxNumSegments, true);
2236 }
2237
2238 /** Just like {@link #optimize()}, except you can specify
2239 * whether the call should block until the optimize
2240 * completes. This is only meaningful with a
2241 * {@link MergeScheduler} that is able to run merges in
2242 * background threads.
2243 *
2244 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2245 * you should immediately close the writer. See <a
2246 * href="#OOME">above</a> for details.</p>
2247 */
2248 public void optimize(boolean doWait) throws CorruptIndexException, IOException {
2249 optimize(1, doWait);
2250 }
2251
2252 /** Just like {@link #optimize(int)}, except you can
2253 * specify whether the call should block until the
2254 * optimize completes. This is only meaningful with a
2255 * {@link MergeScheduler} that is able to run merges in
2256 * background threads.
2257 *
2258 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2259 * you should immediately close the writer. See <a
2260 * href="#OOME">above</a> for details.</p>
2261 */
2262 public void optimize(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException {
2263 ensureOpen();
2264
2265 if (maxNumSegments < 1)
2266 throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
2267
2268 if (infoStream != null)
2269 message("optimize: index now " + segString());
2270
2271 flush(true, false, true);
2272
2273 synchronized(this) {
2274 resetMergeExceptions();
2275 segmentsToOptimize = new HashSet<SegmentInfo>();
2276 final int numSegments = segmentInfos.size();
2277 for(int i=0;i<numSegments;i++)
2278 segmentsToOptimize.add(segmentInfos.info(i));
2279
2280 // Now mark all pending & running merges as optimize
2281 // merge:
2282 for(final MergePolicy.OneMerge merge : pendingMerges) {
2283 merge.optimize = true;
2284 merge.maxNumSegmentsOptimize = maxNumSegments;
2285 }
2286
2287 for ( final MergePolicy.OneMerge merge: runningMerges ) {
2288 merge.optimize = true;
2289 merge.maxNumSegmentsOptimize = maxNumSegments;
2290 }
2291 }
2292
2293 maybeMerge(maxNumSegments, true);
2294
2295 if (doWait) {
2296 synchronized(this) {
2297 while(true) {
2298
2299 if (hitOOM) {
2300 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete optimize");
2301 }
2302
2303 if (mergeExceptions.size() > 0) {
2304 // Forward any exceptions in background merge
2305 // threads to the current thread:
2306 final int size = mergeExceptions.size();
2307 for(int i=0;i<size;i++) {
2308 final MergePolicy.OneMerge merge = mergeExceptions.get(i);
2309 if (merge.optimize) {
2310 IOException err = new IOException("background merge hit exception: " + merge.segString(directory));
2311 final Throwable t = merge.getException();
2312 if (t != null)
2313 err.initCause(t);
2314 throw err;
2315 }
2316 }
2317 }
2318
2319 if (optimizeMergesPending())
2320 doWait();
2321 else
2322 break;
2323 }
2324 }
2325
2326 // If close is called while we are still
2327 // running, throw an exception so the calling
2328 // thread will know the optimize did not
2329 // complete
2330 ensureOpen();
2331 }
2332
2333 // NOTE: in the ConcurrentMergeScheduler case, when
2334 // doWait is false, we can return immediately while
2335 // background threads accomplish the optimization
2336 }
2337
2338 /** Returns true if any merges in pendingMerges or
2339 * runningMerges are optimization merges. */
2340 private synchronized boolean optimizeMergesPending() {
2341 for (final MergePolicy.OneMerge merge : pendingMerges) {
2342 if (merge.optimize)
2343 return true;
2344 }
2345
2346 for (final MergePolicy.OneMerge merge : runningMerges) {
2347 if (merge.optimize)
2348 return true;
2349 }
2350
2351 return false;
2352 }
2353
2354 /** Just like {@link #expungeDeletes()}, except you can
2355 * specify whether the call should block until the
2356 * operation completes. This is only meaningful with a
2357 * {@link MergeScheduler} that is able to run merges in
2358 * background threads.
2359 *
2360 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2361 * you should immediately close the writer. See <a
2362 * href="#OOME">above</a> for details.</p>
2363 */
2364 public void expungeDeletes(boolean doWait)
2365 throws CorruptIndexException, IOException {
2366 ensureOpen();
2367
2368 if (infoStream != null)
2369 message("expungeDeletes: index now " + segString());
2370
2371 MergePolicy.MergeSpecification spec;
2372
2373 synchronized(this) {
2374 spec = mergePolicy.findMergesToExpungeDeletes(segmentInfos);
2375 if (spec != null) {
2376 final int numMerges = spec.merges.size();
2377 for(int i=0;i<numMerges;i++)
2378 registerMerge(spec.merges.get(i));
2379 }
2380 }
2381
2382 mergeScheduler.merge(this);
2383
2384 if (spec != null && doWait) {
2385 final int numMerges = spec.merges.size();
2386 synchronized(this) {
2387 boolean running = true;
2388 while(running) {
2389
2390 if (hitOOM) {
2391 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete expungeDeletes");
2392 }
2393
2394 // Check each merge that MergePolicy asked us to
2395 // do, to see if any of them are still running and
2396 // if any of them have hit an exception.
2397 running = false;
2398 for(int i=0;i<numMerges;i++) {
2399 final MergePolicy.OneMerge merge = spec.merges.get(i);
2400 if (pendingMerges.contains(merge) || runningMerges.contains(merge))
2401 running = true;
2402 Throwable t = merge.getException();
2403 if (t != null) {
2404 IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));
2405 ioe.initCause(t);
2406 throw ioe;
2407 }
2408 }
2409
2410 // If any of our merges are still running, wait:
2411 if (running)
2412 doWait();
2413 }
2414 }
2415 }
2416
2417 // NOTE: in the ConcurrentMergeScheduler case, when
2418 // doWait is false, we can return immediately while
2419 // background threads accomplish the optimization
2420 }
2421
2422
2423 /** Expunges all deletes from the index. When an index
2424 * has many document deletions (or updates to existing
2425 * documents), it's best to either call optimize or
2426 * expungeDeletes to remove all unused data in the index
2427 * associated with the deleted documents. To see how
2428 * many deletions you have pending in your index, call
2429 * {@link IndexReader#numDeletedDocs}
2430 * This saves disk space and memory usage while
2431 * searching. expungeDeletes should be somewhat faster
2432 * than optimize since it does not insist on reducing the
2433 * index to a single segment (though, this depends on the
2434 * {@link MergePolicy}; see {@link
2435 * MergePolicy#findMergesToExpungeDeletes}.). Note that
2436 * this call does not first commit any buffered
2437 * documents, so you must do so yourself if necessary.
2438 * See also {@link #expungeDeletes(boolean)}
2439 *
2440 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2441 * you should immediately close the writer. See <a
2442 * href="#OOME">above</a> for details.</p>
2443 */
2444 public void expungeDeletes() throws CorruptIndexException, IOException {
2445 expungeDeletes(true);
2446 }
2447
2448 /**
2449 * Expert: asks the mergePolicy whether any merges are
2450 * necessary now and if so, runs the requested merges and
2451 * then iterate (test again if merges are needed) until no
2452 * more merges are returned by the mergePolicy.
2453 *
2454 * Explicit calls to maybeMerge() are usually not
2455 * necessary. The most common case is when merge policy
2456 * parameters have changed.
2457 *
2458 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2459 * you should immediately close the writer. See <a
2460 * href="#OOME">above</a> for details.</p>
2461 */
2462 public final void maybeMerge() throws CorruptIndexException, IOException {
2463 maybeMerge(false);
2464 }
2465
2466 private final void maybeMerge(boolean optimize) throws CorruptIndexException, IOException {
2467 maybeMerge(1, optimize);
2468 }
2469
2470 private final void maybeMerge(int maxNumSegmentsOptimize, boolean optimize) throws CorruptIndexException, IOException {
2471 updatePendingMerges(maxNumSegmentsOptimize, optimize);
2472 mergeScheduler.merge(this);
2473 }
2474
2475 private synchronized void updatePendingMerges(int maxNumSegmentsOptimize, boolean optimize)
2476 throws CorruptIndexException, IOException {
2477 assert !optimize || maxNumSegmentsOptimize > 0;
2478
2479 if (stopMerges)
2480 return;
2481
2482 // Do not start new merges if we've hit OOME
2483 if (hitOOM) {
2484 return;
2485 }
2486
2487 final MergePolicy.MergeSpecification spec;
2488 if (optimize) {
2489 spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize);
2490
2491 if (spec != null) {
2492 final int numMerges = spec.merges.size();
2493 for(int i=0;i<numMerges;i++) {
2494 final MergePolicy.OneMerge merge = ( spec.merges.get(i));
2495 merge.optimize = true;
2496 merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize;
2497 }
2498 }
2499
2500 } else
2501 spec = mergePolicy.findMerges(segmentInfos);
2502
2503 if (spec != null) {
2504 final int numMerges = spec.merges.size();
2505 for(int i=0;i<numMerges;i++)
2506 registerMerge(spec.merges.get(i));
2507 }
2508 }
2509
2510 /** Expert: the {@link MergeScheduler} calls this method
2511 * to retrieve the next merge requested by the
2512 * MergePolicy */
2513 synchronized MergePolicy.OneMerge getNextMerge() {
2514 if (pendingMerges.size() == 0)
2515 return null;
2516 else {
2517 // Advance the merge from pending to running
2518 MergePolicy.OneMerge merge = pendingMerges.removeFirst();
2519 runningMerges.add(merge);
2520 return merge;
2521 }
2522 }
2523
2524 /** Like getNextMerge() except only returns a merge if it's
2525 * external. */
2526 private synchronized MergePolicy.OneMerge getNextExternalMerge() {
2527 if (pendingMerges.size() == 0)
2528 return null;
2529 else {
2530 Iterator<MergePolicy.OneMerge> it = pendingMerges.iterator();
2531 while(it.hasNext()) {
2532 MergePolicy.OneMerge merge = it.next();
2533 if (merge.isExternal) {
2534 // Advance the merge from pending to running
2535 it.remove();
2536 runningMerges.add(merge);
2537 return merge;
2538 }
2539 }
2540
2541 // All existing merges do not involve external segments
2542 return null;
2543 }
2544 }
2545
2546 /*
2547 * Begin a transaction. During a transaction, any segment
2548 * merges that happen (or ram segments flushed) will not
2549 * write a new segments file and will not remove any files
2550 * that were present at the start of the transaction. You
2551 * must make a matched (try/finally) call to
2552 * commitTransaction() or rollbackTransaction() to finish
2553 * the transaction.
2554 *
2555 * Note that buffered documents and delete terms are not handled
2556 * within the transactions, so they must be flushed before the
2557 * transaction is started.
2558 */
2559 private synchronized void startTransaction(boolean haveReadLock) throws IOException {
2560
2561 boolean success = false;
2562 try {
2563 if (infoStream != null)
2564 message("now start transaction");
2565
2566 assert docWriter.getNumBufferedDeleteTerms() == 0 :
2567 "calling startTransaction with buffered delete terms not supported: numBufferedDeleteTerms=" + docWriter.getNumBufferedDeleteTerms();
2568 assert docWriter.getNumDocsInRAM() == 0 :
2569 "calling startTransaction with buffered documents not supported: numDocsInRAM=" + docWriter.getNumDocsInRAM();
2570
2571 ensureOpen();
2572
2573 // If a transaction is trying to roll back (because
2574 // addIndexes hit an exception) then wait here until
2575 // that's done:
2576 synchronized(this) {
2577 while(stopMerges)
2578 doWait();
2579 }
2580 success = true;
2581 } finally {
2582 // Release the write lock if our caller held it, on
2583 // hitting an exception
2584 if (!success && haveReadLock)
2585 releaseRead();
2586 }
2587
2588 if (haveReadLock) {
2589 upgradeReadToWrite();
2590 } else {
2591 acquireWrite();
2592 }
2593
2594 success = false;
2595 try {
2596 localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
2597
2598 assert !hasExternalSegments();
2599
2600 localFlushedDocCount = docWriter.getFlushedDocCount();
2601
2602 // We must "protect" our files at this point from
2603 // deletion in case we need to rollback:
2604 deleter.incRef(segmentInfos, false);
2605
2606 success = true;
2607 } finally {
2608 if (!success)
2609 finishAddIndexes();
2610 }
2611 }
2612
2613 /*
2614 * Rolls back the transaction and restores state to where
2615 * we were at the start.
2616 */
2617 private synchronized void rollbackTransaction() throws IOException {
2618
2619 if (infoStream != null)
2620 message("now rollback transaction");
2621
2622 if (docWriter != null) {
2623 docWriter.setFlushedDocCount(localFlushedDocCount);
2624 }
2625
2626 // Must finish merges before rolling back segmentInfos
2627 // so merges don't hit exceptions on trying to commit
2628 // themselves, don't get files deleted out from under
2629 // them, etc:
2630 finishMerges(false);
2631
2632 // Keep the same segmentInfos instance but replace all
2633 // of its SegmentInfo instances. This is so the next
2634 // attempt to commit using this instance of IndexWriter
2635 // will always write to a new generation ("write once").
2636 segmentInfos.clear();
2637 segmentInfos.addAll(localRollbackSegmentInfos);
2638 localRollbackSegmentInfos = null;
2639
2640 // This must come after we rollback segmentInfos, so
2641 // that if a commit() kicks off it does not see the
2642 // segmentInfos with external segments
2643 finishAddIndexes();
2644
2645 // Ask deleter to locate unreferenced files we had
2646 // created & remove them:
2647 deleter.checkpoint(segmentInfos, false);
2648
2649 // Remove the incRef we did in startTransaction:
2650 deleter.decRef(segmentInfos);
2651
2652 // Also ask deleter to remove any newly created files
2653 // that were never incref'd; this "garbage" is created
2654 // when a merge kicks off but aborts part way through
2655 // before it had a chance to incRef the files it had
2656 // partially created
2657 deleter.refresh();
2658
2659 notifyAll();
2660
2661 assert !hasExternalSegments();
2662 }
2663
2664 /*
2665 * Commits the transaction. This will write the new
2666 * segments file and remove and pending deletions we have
2667 * accumulated during the transaction
2668 */
2669 private synchronized void commitTransaction() throws IOException {
2670
2671 if (infoStream != null)
2672 message("now commit transaction");
2673
2674 // Give deleter a chance to remove files now:
2675 checkpoint();
2676
2677 // Remove the incRef we did in startTransaction.
2678 deleter.decRef(localRollbackSegmentInfos);
2679
2680 localRollbackSegmentInfos = null;
2681
2682 assert !hasExternalSegments();
2683
2684 finishAddIndexes();
2685 }
2686
2687 /**
2688 * Close the <code>IndexWriter</code> without committing
2689 * any changes that have occurred since the last commit
2690 * (or since it was opened, if commit hasn't been called).
2691 * This removes any temporary files that had been created,
2692 * after which the state of the index will be the same as
2693 * it was when commit() was last called or when this
2694 * writer was first opened. This also clears a previous
2695 * call to {@link #prepareCommit}.
2696 * @throws IOException if there is a low-level IO error
2697 */
2698 public void rollback() throws IOException {
2699 ensureOpen();
2700
2701 // Ensure that only one thread actually gets to do the closing:
2702 if (shouldClose())
2703 rollbackInternal();
2704 }
2705
2706 private void rollbackInternal() throws IOException {
2707
2708 boolean success = false;
2709
2710 docWriter.pauseAllThreads();
2711
2712 try {
2713 finishMerges(false);
2714
2715 // Must pre-close these two, in case they increment
2716 // changeCount so that we can then set it to false
2717 // before calling closeInternal
2718 mergePolicy.close();
2719 mergeScheduler.close();
2720
2721 synchronized(this) {
2722
2723 if (pendingCommit != null) {
2724 pendingCommit.rollbackCommit(directory);
2725 deleter.decRef(pendingCommit);
2726 pendingCommit = null;
2727 notifyAll();
2728 }
2729
2730 // Keep the same segmentInfos instance but replace all
2731 // of its SegmentInfo instances. This is so the next
2732 // attempt to commit using this instance of IndexWriter
2733 // will always write to a new generation ("write
2734 // once").
2735 segmentInfos.clear();
2736 segmentInfos.addAll(rollbackSegmentInfos);
2737
2738 assert !hasExternalSegments();
2739
2740 docWriter.abort();
2741
2742 assert testPoint("rollback before checkpoint");
2743
2744 // Ask deleter to locate unreferenced files & remove
2745 // them:
2746 deleter.checkpoint(segmentInfos, false);
2747 deleter.refresh();
2748 }
2749
2750 // Don't bother saving any changes in our segmentInfos
2751 readerPool.clear(null);
2752
2753 lastCommitChangeCount = changeCount;
2754
2755 success = true;
2756 } catch (OutOfMemoryError oom) {
2757 handleOOM(oom, "rollbackInternal");
2758 } finally {
2759 synchronized(this) {
2760 if (!success) {
2761 docWriter.resumeAllThreads();
2762 closing = false;
2763 notifyAll();
2764 if (infoStream != null)
2765 message("hit exception during rollback");
2766 }
2767 }
2768 }
2769
2770 closeInternal(false);
2771 }
2772
2773 /**
2774 * Delete all documents in the index.
2775 *
2776 * <p>This method will drop all buffered documents and will
2777 * remove all segments from the index. This change will not be
2778 * visible until a {@link #commit()} has been called. This method
2779 * can be rolled back using {@link #rollback()}.</p>
2780 *
2781 * <p>NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).</p>
2782 *
2783 * <p>NOTE: this method will forcefully abort all merges
2784 * in progress. If other threads are running {@link
2785 * #optimize()} or any of the addIndexes methods, they
2786 * will receive {@link MergePolicy.MergeAbortedException}s.
2787 */
2788 public synchronized void deleteAll() throws IOException {
2789 docWriter.pauseAllThreads();
2790 try {
2791
2792 // Abort any running merges
2793 finishMerges(false);
2794
2795 // Remove any buffered docs
2796 docWriter.abort();
2797 docWriter.setFlushedDocCount(0);
2798
2799 // Remove all segments
2800 segmentInfos.clear();
2801
2802 // Ask deleter to locate unreferenced files & remove them:
2803 deleter.checkpoint(segmentInfos, false);
2804 deleter.refresh();
2805
2806 // Don't bother saving any changes in our segmentInfos
2807 readerPool.clear(null);
2808
2809 // Mark that the index has changed
2810 ++changeCount;
2811 } catch (OutOfMemoryError oom) {
2812 handleOOM(oom, "deleteAll");
2813 } finally {
2814 docWriter.resumeAllThreads();
2815 if (infoStream != null) {
2816 message("hit exception during deleteAll");
2817 }
2818 }
2819 }
2820
2821 private synchronized void finishMerges(boolean waitForMerges) throws IOException {
2822 if (!waitForMerges) {
2823
2824 stopMerges = true;
2825
2826 // Abort all pending & running merges:
2827 for (final MergePolicy.OneMerge merge : pendingMerges) {
2828 if (infoStream != null)
2829 message("now abort pending merge " + merge.segString(directory));
2830 merge.abort();
2831 mergeFinish(merge);
2832 }
2833 pendingMerges.clear();
2834
2835 for (final MergePolicy.OneMerge merge : runningMerges) {
2836 if (infoStream != null)
2837 message("now abort running merge " + merge.segString(directory));
2838 merge.abort();
2839 }
2840
2841 // Ensure any running addIndexes finishes. It's fine
2842 // if a new one attempts to start because its merges
2843 // will quickly see the stopMerges == true and abort.
2844 acquireRead();
2845 releaseRead();
2846
2847 // These merges periodically check whether they have
2848 // been aborted, and stop if so. We wait here to make
2849 // sure they all stop. It should not take very long
2850 // because the merge threads periodically check if
2851 // they are aborted.
2852 while(runningMerges.size() > 0) {
2853 if (infoStream != null)
2854 message("now wait for " + runningMerges.size() + " running merge to abort");
2855 doWait();
2856 }
2857
2858 stopMerges = false;
2859 notifyAll();
2860
2861 assert 0 == mergingSegments.size();
2862
2863 if (infoStream != null)
2864 message("all running merges have aborted");
2865
2866 } else {
2867 // waitForMerges() will ensure any running addIndexes finishes.
2868 // It's fine if a new one attempts to start because from our
2869 // caller above the call will see that we are in the
2870 // process of closing, and will throw an
2871 // AlreadyClosedException.
2872 waitForMerges();
2873 }
2874 }
2875
2876 /**
2877 * Wait for any currently outstanding merges to finish.
2878 *
2879 * <p>It is guaranteed that any merges started prior to calling this method
2880 * will have completed once this method completes.</p>
2881 */
2882 public synchronized void waitForMerges() {
2883 // Ensure any running addIndexes finishes.
2884 acquireRead();
2885 releaseRead();
2886
2887 while(pendingMerges.size() > 0 || runningMerges.size() > 0) {
2888 doWait();
2889 }
2890
2891 // sanity check
2892 assert 0 == mergingSegments.size();
2893 }
2894
2895 /*
2896 * Called whenever the SegmentInfos has been updated and
2897 * the index files referenced exist (correctly) in the
2898 * index directory.
2899 */
2900 private synchronized void checkpoint() throws IOException {
2901 changeCount++;
2902 deleter.checkpoint(segmentInfos, false);
2903 }
2904
2905 private void finishAddIndexes() {
2906 releaseWrite();
2907 }
2908
2909 private void blockAddIndexes(boolean includePendingClose) {
2910
2911 acquireRead();
2912
2913 boolean success = false;
2914 try {
2915
2916 // Make sure we are still open since we could have
2917 // waited quite a while for last addIndexes to finish
2918 ensureOpen(includePendingClose);
2919 success = true;
2920 } finally {
2921 if (!success)
2922 releaseRead();
2923 }
2924 }
2925
2926 private void resumeAddIndexes() {
2927 releaseRead();
2928 }
2929
2930 private synchronized void resetMergeExceptions() {
2931 mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
2932 mergeGen++;
2933 }
2934
2935 private void noDupDirs(Directory... dirs) {
2936 HashSet<Directory> dups = new HashSet<Directory>();
2937 for(int i=0;i<dirs.length;i++) {
2938 if (dups.contains(dirs[i]))
2939 throw new IllegalArgumentException("Directory " + dirs[i] + " appears more than once");
2940 if (dirs[i] == directory)
2941 throw new IllegalArgumentException("Cannot add directory to itself");
2942 dups.add(dirs[i]);
2943 }
2944 }
2945
2946 /**
2947 * Merges all segments from an array of indexes into this
2948 * index.
2949 *
2950 * <p>This may be used to parallelize batch indexing. A large document
2951 * collection can be broken into sub-collections. Each sub-collection can be
2952 * indexed in parallel, on a different thread, process or machine. The
2953 * complete index can then be created by merging sub-collection indexes
2954 * with this method.
2955 *
2956 * <p><b>NOTE:</b> the index in each Directory must not be
2957 * changed (opened by a writer) while this method is
2958 * running. This method does not acquire a write lock in
2959 * each input Directory, so it is up to the caller to
2960 * enforce this.
2961 *
2962 * <p><b>NOTE:</b> while this is running, any attempts to
2963 * add or delete documents (with another thread) will be
2964 * paused until this method completes.
2965 *
2966 * <p>This method is transactional in how Exceptions are
2967 * handled: it does not commit a new segments_N file until
2968 * all indexes are added. This means if an Exception
2969 * occurs (for example disk full), then either no indexes
2970 * will have been added or they all will have been.</p>
2971 *
2972 * <p>Note that this requires temporary free space in the
2973 * Directory up to 2X the sum of all input indexes
2974 * (including the starting index). If readers/searchers
2975 * are open against the starting index, then temporary
2976 * free space required will be higher by the size of the
2977 * starting index (see {@link #optimize()} for details).
2978 * </p>
2979 *
2980 * <p>Once this completes, the final size of the index
2981 * will be less than the sum of all input index sizes
2982 * (including the starting index). It could be quite a
2983 * bit smaller (if there were many pending deletes) or
2984 * just slightly smaller.</p>
2985 *
2986 * <p>
2987 * This requires this index not be among those to be added.
2988 *
2989 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
2990 * you should immediately close the writer. See <a
2991 * href="#OOME">above</a> for details.</p>
2992 *
2993 * @throws CorruptIndexException if the index is corrupt
2994 * @throws IOException if there is a low-level IO error
2995 */
2996 public void addIndexesNoOptimize(Directory... dirs)
2997 throws CorruptIndexException, IOException {
2998
2999 ensureOpen();
3000
3001 noDupDirs(dirs);
3002
3003 // Do not allow add docs or deletes while we are running:
3004 docWriter.pauseAllThreads();
3005
3006 try {
3007 if (infoStream != null)
3008 message("flush at addIndexesNoOptimize");
3009 flush(true, false, true);
3010
3011 boolean success = false;
3012
3013 startTransaction(false);
3014
3015 try {
3016
3017 int docCount = 0;
3018 synchronized(this) {
3019 ensureOpen();
3020
3021 for (int i = 0; i < dirs.length; i++) {
3022 if (directory == dirs[i]) {
3023 // cannot add this index: segments may be deleted in merge before added
3024 throw new IllegalArgumentException("Cannot add this index to itself");
3025 }
3026
3027 SegmentInfos sis = new SegmentInfos(); // read infos from dir
3028 sis.read(dirs[i]);
3029 for (int j = 0; j < sis.size(); j++) {
3030 SegmentInfo info = sis.info(j);
3031 assert !segmentInfos.contains(info): "dup info dir=" + info.dir + " name=" + info.name;
3032 docCount += info.docCount;
3033 segmentInfos.add(info); // add each info
3034 }
3035 }
3036 }
3037
3038 // Notify DocumentsWriter that the flushed count just increased
3039 docWriter.updateFlushedDocCount(docCount);
3040
3041 maybeMerge();
3042
3043 ensureOpen();
3044
3045 // If after merging there remain segments in the index
3046 // that are in a different directory, just copy these
3047 // over into our index. This is necessary (before
3048 // finishing the transaction) to avoid leaving the
3049 // index in an unusable (inconsistent) state.
3050 resolveExternalSegments();
3051
3052 ensureOpen();
3053
3054 success = true;
3055
3056 } finally {
3057 if (success) {
3058 commitTransaction();
3059 } else {
3060 rollbackTransaction();
3061 }
3062 }
3063 } catch (OutOfMemoryError oom) {
3064 handleOOM(oom, "addIndexesNoOptimize");
3065 } finally {
3066 if (docWriter != null) {
3067 docWriter.resumeAllThreads();
3068 }
3069 }
3070 }
3071
3072 private boolean hasExternalSegments() {
3073 return segmentInfos.hasExternalSegments(directory);
3074 }
3075
3076 /* If any of our segments are using a directory != ours
3077 * then we have to either copy them over one by one, merge
3078 * them (if merge policy has chosen to) or wait until
3079 * currently running merges (in the background) complete.
3080 * We don't return until the SegmentInfos has no more
3081 * external segments. Currently this is only used by
3082 * addIndexesNoOptimize(). */
3083 private void resolveExternalSegments() throws CorruptIndexException, IOException {
3084
3085 boolean any = false;
3086
3087 boolean done = false;
3088
3089 while(!done) {
3090 SegmentInfo info = null;
3091 MergePolicy.OneMerge merge = null;
3092 synchronized(this) {
3093
3094 if (stopMerges)
3095 throw new MergePolicy.MergeAbortedException("rollback() was called or addIndexes* hit an unhandled exception");
3096
3097 final int numSegments = segmentInfos.size();
3098
3099 done = true;
3100 for(int i=0;i<numSegments;i++) {
3101 info = segmentInfos.info(i);
3102 if (info.dir != directory) {
3103 done = false;
3104 final MergePolicy.OneMerge newMerge = new MergePolicy.OneMerge(segmentInfos.range(i, 1+i), mergePolicy instanceof LogMergePolicy && getUseCompoundFile());
3105
3106 // Returns true if no running merge conflicts
3107 // with this one (and, records this merge as
3108 // pending), ie, this segment is not currently
3109 // being merged:
3110 if (registerMerge(newMerge)) {
3111 merge = newMerge;
3112
3113 // If this segment is not currently being
3114 // merged, then advance it to running & run
3115 // the merge ourself (below):
3116 pendingMerges.remove(merge);
3117 runningMerges.add(merge);
3118 break;
3119 }
3120 }
3121 }
3122
3123 if (!done && merge == null)
3124 // We are not yet done (external segments still
3125 // exist in segmentInfos), yet, all such segments
3126 // are currently "covered" by a pending or running
3127 // merge. We now try to grab any pending merge
3128 // that involves external segments:
3129 merge = getNextExternalMerge();
3130
3131 if (!done && merge == null)
3132 // We are not yet done, and, all external segments
3133 // fall under merges that the merge scheduler is
3134 // currently running. So, we now wait and check
3135 // back to see if the merge has completed.
3136 doWait();
3137 }
3138
3139 if (merge != null) {
3140 any = true;
3141 merge(merge);
3142 }
3143 }
3144
3145 if (any)
3146 // Sometimes, on copying an external segment over,
3147 // more merges may become necessary:
3148 mergeScheduler.merge(this);
3149 }
3150
3151 /** Merges the provided indexes into this index.
3152 * <p>After this completes, the index is optimized. </p>
3153 * <p>The provided IndexReaders are not closed.</p>
3154 *
3155 * <p><b>NOTE:</b> while this is running, any attempts to
3156 * add or delete documents (with another thread) will be
3157 * paused until this method completes.
3158 *
3159 * <p>See {@link #addIndexesNoOptimize} for
3160 * details on transactional semantics, temporary free
3161 * space required in the Directory, and non-CFS segments
3162 * on an Exception.</p>
3163 *
3164 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3165 * you should immediately close the writer. See <a
3166 * href="#OOME">above</a> for details.</p>
3167 *
3168 * @throws CorruptIndexException if the index is corrupt
3169 * @throws IOException if there is a low-level IO error
3170 */
3171 public void addIndexes(IndexReader... readers)
3172 throws CorruptIndexException, IOException {
3173
3174 ensureOpen();
3175
3176 // Do not allow add docs or deletes while we are running:
3177 docWriter.pauseAllThreads();
3178
3179 // We must pre-acquire a read lock here (and upgrade to
3180 // write lock in startTransaction below) so that no
3181 // other addIndexes is allowed to start up after we have
3182 // flushed & optimized but before we then start our
3183 // transaction. This is because the merging below
3184 // requires that only one segment is present in the
3185 // index:
3186 acquireRead();
3187
3188 try {
3189
3190 SegmentInfo info = null;
3191 String mergedName = null;
3192 SegmentMerger merger = null;
3193
3194 boolean success = false;
3195
3196 try {
3197 flush(true, false, true);
3198 optimize(); // start with zero or 1 seg
3199 success = true;
3200 } finally {
3201 // Take care to release the read lock if we hit an
3202 // exception before starting the transaction
3203 if (!success)
3204 releaseRead();
3205 }
3206
3207 // true means we already have a read lock; if this
3208 // call hits an exception it will release the write
3209 // lock:
3210 startTransaction(true);
3211
3212 try {
3213 mergedName = newSegmentName();
3214 merger = new SegmentMerger(this, mergedName, null);
3215
3216 SegmentReader sReader = null;
3217 synchronized(this) {
3218 if (segmentInfos.size() == 1) { // add existing index, if any
3219 sReader = readerPool.get(segmentInfos.info(0), true, BufferedIndexInput.BUFFER_SIZE, -1);
3220 }
3221 }
3222
3223 success = false;
3224
3225 try {
3226 if (sReader != null)
3227 merger.add(sReader);
3228
3229 for (int i = 0; i < readers.length; i++) // add new indexes
3230 merger.add(readers[i]);
3231
3232 int docCount = merger.merge(); // merge 'em
3233
3234 synchronized(this) {
3235 segmentInfos.clear(); // pop old infos & add new
3236 info = new SegmentInfo(mergedName, docCount, directory, false, true,
3237 -1, null, false, merger.hasProx());
3238 setDiagnostics(info, "addIndexes(IndexReader...)");
3239 segmentInfos.add(info);
3240 }
3241
3242 // Notify DocumentsWriter that the flushed count just increased
3243 docWriter.updateFlushedDocCount(docCount);
3244
3245 success = true;
3246
3247 } finally {
3248 if (sReader != null) {
3249 readerPool.release(sReader);
3250 }
3251 }
3252 } finally {
3253 if (!success) {
3254 if (infoStream != null)
3255 message("hit exception in addIndexes during merge");
3256 rollbackTransaction();
3257 } else {
3258 commitTransaction();
3259 }
3260 }
3261
3262 if (mergePolicy instanceof LogMergePolicy && getUseCompoundFile()) {
3263
3264 List<String> files = null;
3265
3266 synchronized(this) {
3267 // Must incRef our files so that if another thread
3268 // is running merge/optimize, it doesn't delete our
3269 // segment's files before we have a change to
3270 // finish making the compound file.
3271 if (segmentInfos.contains(info)) {
3272 files = info.files();
3273 deleter.incRef(files);
3274 }
3275 }
3276
3277 if (files != null) {
3278
3279 success = false;
3280
3281 startTransaction(false);
3282
3283 try {
3284 merger.createCompoundFile(mergedName + ".cfs");
3285 synchronized(this) {
3286 info.setUseCompoundFile(true);
3287 }
3288
3289 success = true;
3290
3291 } finally {
3292
3293 deleter.decRef(files);
3294
3295 if (!success) {
3296 if (infoStream != null)
3297 message("hit exception building compound file in addIndexes during merge");
3298
3299 rollbackTransaction();
3300 } else {
3301 commitTransaction();
3302 }
3303 }
3304 }
3305 }
3306 } catch (OutOfMemoryError oom) {
3307 handleOOM(oom, "addIndexes(IndexReader...)");
3308 } finally {
3309 if (docWriter != null) {
3310 docWriter.resumeAllThreads();
3311 }
3312 }
3313 }
3314
3315 // This is called after pending added and deleted
3316 // documents have been flushed to the Directory but before
3317 // the change is committed (new segments_N file written).
3318 void doAfterFlush()
3319 throws IOException {
3320 }
3321
3322 /** Expert: prepare for commit.
3323 *
3324 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3325 * you should immediately close the writer. See <a
3326 * href="#OOME">above</a> for details.</p>
3327 *
3328 * @see #prepareCommit(Map) */
3329 public final void prepareCommit() throws CorruptIndexException, IOException {
3330 ensureOpen();
3331 prepareCommit(null);
3332 }
3333
3334 /** <p>Expert: prepare for commit, specifying
3335 * commitUserData Map (String -> String). This does the
3336 * first phase of 2-phase commit. This method does all
3337 * steps necessary to commit changes since this writer
3338 * was opened: flushes pending added and deleted docs,
3339 * syncs the index files, writes most of next segments_N
3340 * file. After calling this you must call either {@link
3341 * #commit()} to finish the commit, or {@link
3342 * #rollback()} to revert the commit and undo all changes
3343 * done since the writer was opened.</p>
3344 *
3345 * You can also just call {@link #commit(Map)} directly
3346 * without prepareCommit first in which case that method
3347 * will internally call prepareCommit.
3348 *
3349 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3350 * you should immediately close the writer. See <a
3351 * href="#OOME">above</a> for details.</p>
3352 *
3353 * @param commitUserData Opaque Map (String->String)
3354 * that's recorded into the segments file in the index,
3355 * and retrievable by {@link
3356 * IndexReader#getCommitUserData}. Note that when
3357 * IndexWriter commits itself during {@link #close}, the
3358 * commitUserData is unchanged (just carried over from
3359 * the prior commit). If this is null then the previous
3360 * commitUserData is kept. Also, the commitUserData will
3361 * only "stick" if there are actually changes in the
3362 * index to commit.
3363 */
3364 public final void prepareCommit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
3365
3366 if (hitOOM) {
3367 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
3368 }
3369
3370 if (pendingCommit != null)
3371 throw new IllegalStateException("prepareCommit was already called with no corresponding call to commit");
3372
3373 if (infoStream != null)
3374 message("prepareCommit: flush");
3375
3376 flush(true, true, true);
3377
3378 startCommit(0, commitUserData);
3379 }
3380
3381 // Used only by commit, below; lock order is commitLock -> IW
3382 private final Object commitLock = new Object();
3383
3384 private void commit(long sizeInBytes) throws IOException {
3385 synchronized(commitLock) {
3386 startCommit(sizeInBytes, null);
3387 finishCommit();
3388 }
3389 }
3390
3391 /**
3392 * <p>Commits all pending changes (added & deleted
3393 * documents, optimizations, segment merges, added
3394 * indexes, etc.) to the index, and syncs all referenced
3395 * index files, such that a reader will see the changes
3396 * and the index updates will survive an OS or machine
3397 * crash or power loss. Note that this does not wait for
3398 * any running background merges to finish. This may be a
3399 * costly operation, so you should test the cost in your
3400 * application and do it only when really necessary.</p>
3401 *
3402 * <p> Note that this operation calls Directory.sync on
3403 * the index files. That call should not return until the
3404 * file contents & metadata are on stable storage. For
3405 * FSDirectory, this calls the OS's fsync. But, beware:
3406 * some hardware devices may in fact cache writes even
3407 * during fsync, and return before the bits are actually
3408 * on stable storage, to give the appearance of faster
3409 * performance. If you have such a device, and it does
3410 * not have a battery backup (for example) then on power
3411 * loss it may still lose data. Lucene cannot guarantee
3412 * consistency on such devices. </p>
3413 *
3414 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3415 * you should immediately close the writer. See <a
3416 * href="#OOME">above</a> for details.</p>
3417 *
3418 * @see #prepareCommit
3419 * @see #commit(Map)
3420 */
3421 public final void commit() throws CorruptIndexException, IOException {
3422 commit(null);
3423 }
3424
3425 /** Commits all changes to the index, specifying a
3426 * commitUserData Map (String -> String). This just
3427 * calls {@link #prepareCommit(Map)} (if you didn't
3428 * already call it) and then {@link #finishCommit}.
3429 *
3430 * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
3431 * you should immediately close the writer. See <a
3432 * href="#OOME">above</a> for details.</p>
3433 */
3434 public final void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
3435
3436 ensureOpen();
3437
3438 if (infoStream != null) {
3439 message("commit: start");
3440 }
3441
3442 synchronized(commitLock) {
3443 if (infoStream != null) {
3444 message("commit: enter lock");
3445 }
3446
3447 if (pendingCommit == null) {
3448 if (infoStream != null) {
3449 message("commit: now prepare");
3450 }
3451 prepareCommit(commitUserData);
3452 } else if (infoStream != null) {
3453 message("commit: already prepared");
3454 }
3455
3456 finishCommit();
3457 }
3458 }
3459
3460 private synchronized final void finishCommit() throws CorruptIndexException, IOException {
3461
3462 if (pendingCommit != null) {
3463 try {
3464 if (infoStream != null)
3465 message("commit: pendingCommit != null");
3466 pendingCommit.finishCommit(directory);
3467 if (infoStream != null)
3468 message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\"");
3469 lastCommitChangeCount = pendingCommitChangeCount;
3470 segmentInfos.updateGeneration(pendingCommit);
3471 segmentInfos.setUserData(pendingCommit.getUserData());
3472 setRollbackSegmentInfos(pendingCommit);
3473 deleter.checkpoint(pendingCommit, true);
3474 } finally {
3475 deleter.decRef(pendingCommit);
3476 pendingCommit = null;
3477 notifyAll();
3478 }
3479
3480 } else if (infoStream != null)
3481 message("commit: pendingCommit == null; skip");
3482
3483 if (infoStream != null)
3484 message("commit: done");
3485 }
3486
3487 /**
3488 * Flush all in-memory buffered udpates (adds and deletes)
3489 * to the Directory.
3490 * @param triggerMerge if true, we may merge segments (if
3491 * deletes or docs were flushed) if necessary
3492 * @param flushDocStores if false we are allowed to keep
3493 * doc stores open to share with the next segment
3494 * @param flushDeletes whether pending deletes should also
3495 * be flushed
3496 */
3497 protected final void flush(boolean triggerMerge, boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
3498 // We can be called during close, when closing==true, so we must pass false to ensureOpen:
3499 ensureOpen(false);
3500 if (doFlush(flushDocStores, flushDeletes) && triggerMerge)
3501 maybeMerge();
3502 }
3503
3504 // TODO: this method should not have to be entirely
3505 // synchronized, ie, merges should be allowed to commit
3506 // even while a flush is happening
3507 private synchronized final boolean doFlush(boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
3508 try {
3509 return doFlushInternal(flushDocStores, flushDeletes);
3510 } finally {
3511 docWriter.clearFlushPending();
3512 }
3513 }
3514
3515 // TODO: this method should not have to be entirely
3516 // synchronized, ie, merges should be allowed to commit
3517 // even while a flush is happening
3518 private synchronized final boolean doFlushInternal(boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
3519
3520 if (hitOOM) {
3521 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
3522 }
3523
3524 ensureOpen(false);
3525
3526 assert testPoint("startDoFlush");
3527
3528 flushCount++;
3529
3530 // If we are flushing because too many deletes
3531 // accumulated, then we should apply the deletes to free
3532 // RAM:
3533 flushDeletes |= docWriter.doApplyDeletes();
3534
3535 // Make sure no threads are actively adding a document.
3536 // Returns true if docWriter is currently aborting, in
3537 // which case we skip flushing this segment
3538 if (docWriter.pauseAllThreads()) {
3539 docWriter.resumeAllThreads();
3540 return false;
3541 }
3542
3543 try {
3544
3545 SegmentInfo newSegment = null;
3546
3547 final int numDocs = docWriter.getNumDocsInRAM();
3548
3549 // Always flush docs if there are any
3550 boolean flushDocs = numDocs > 0;
3551
3552 String docStoreSegment = docWriter.getDocStoreSegment();
3553
3554 assert docStoreSegment != null || numDocs == 0: "dss=" + docStoreSegment + " numDocs=" + numDocs;
3555
3556 if (docStoreSegment == null)
3557 flushDocStores = false;
3558
3559 int docStoreOffset = docWriter.getDocStoreOffset();
3560
3561 boolean docStoreIsCompoundFile = false;
3562
3563 if (infoStream != null) {
3564 message(" flush: segment=" + docWriter.getSegment() +
3565 " docStoreSegment=" + docWriter.getDocStoreSegment() +
3566 " docStoreOffset=" + docStoreOffset +
3567 " flushDocs=" + flushDocs +
3568 " flushDeletes=" + flushDeletes +
3569 " flushDocStores=" + flushDocStores +
3570 " numDocs=" + numDocs +
3571 " numBufDelTerms=" + docWriter.getNumBufferedDeleteTerms());
3572 message(" index before flush " + segString());
3573 }
3574
3575 // Check if the doc stores must be separately flushed
3576 // because other segments, besides the one we are about
3577 // to flush, reference it
3578 if (flushDocStores && (!flushDocs || !docWriter.getSegment().equals(docWriter.getDocStoreSegment()))) {
3579 // We must separately flush the doc store
3580 if (infoStream != null)
3581 message(" flush shared docStore segment " + docStoreSegment);
3582
3583 docStoreIsCompoundFile = flushDocStores();
3584 flushDocStores = false;
3585 }
3586
3587 String segment = docWriter.getSegment();
3588
3589 // If we are flushing docs, segment must not be null:
3590 assert segment != null || !flushDocs;
3591
3592 if (flushDocs) {
3593
3594 boolean success = false;
3595 final int flushedDocCount;
3596
3597 try {
3598 flushedDocCount = docWriter.flush(flushDocStores);
3599 success = true;
3600 } finally {
3601 if (!success) {
3602 if (infoStream != null)
3603 message("hit exception flushing segment " + segment);
3604 deleter.refresh(segment);
3605 }
3606 }
3607
3608 if (0 == docStoreOffset && flushDocStores) {
3609 // This means we are flushing private doc stores
3610 // with this segment, so it will not be shared
3611 // with other segments
3612 assert docStoreSegment != null;
3613 assert docStoreSegment.equals(segment);
3614 docStoreOffset = -1;
3615 docStoreIsCompoundFile = false;
3616 docStoreSegment = null;
3617 }
3618
3619 // Create new SegmentInfo, but do not add to our
3620 // segmentInfos until deletes are flushed
3621 // successfully.
3622 newSegment = new SegmentInfo(segment,
3623 flushedDocCount,
3624 directory, false, true,
3625 docStoreOffset, docStoreSegment,
3626 docStoreIsCompoundFile,
3627 docWriter.hasProx());
3628 setDiagnostics(newSegment, "flush");
3629 }
3630
3631 docWriter.pushDeletes();
3632
3633 if (flushDocs) {
3634 segmentInfos.add(newSegment);
3635 checkpoint();
3636 }
3637
3638 if (flushDocs && mergePolicy.useCompoundFile(segmentInfos, newSegment)) {
3639 // Now build compound file
3640 boolean success = false;
3641 try {
3642 docWriter.createCompoundFile(segment);
3643 success = true;
3644 } finally {
3645 if (!success) {
3646 if (infoStream != null)
3647 message("hit exception creating compound file for newly flushed segment " + segment);
3648 deleter.deleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
3649 }
3650 }
3651
3652 newSegment.setUseCompoundFile(true);
3653 checkpoint();
3654 }
3655
3656 if (flushDeletes) {
3657 applyDeletes();
3658 }
3659
3660 if (flushDocs)
3661 checkpoint();
3662
3663 doAfterFlush();
3664
3665 return flushDocs;
3666
3667 } catch (OutOfMemoryError oom) {
3668 handleOOM(oom, "doFlush");
3669 // never hit
3670 return false;
3671 } finally {
3672 docWriter.resumeAllThreads();
3673 }
3674 }
3675
3676 /** Expert: Return the total size of all index files currently cached in memory.
3677 * Useful for size management with flushRamDocs()
3678 */
3679 public final long ramSizeInBytes() {
3680 ensureOpen();
3681 return docWriter.getRAMUsed();
3682 }
3683
3684 /** Expert: Return the number of documents currently
3685 * buffered in RAM. */
3686 public final synchronized int numRamDocs() {
3687 ensureOpen();
3688 return docWriter.getNumDocsInRAM();
3689 }
3690
3691 private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
3692
3693 int first = segmentInfos.indexOf(merge.segments.info(0));
3694 if (first == -1)
3695 throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current index " + segString(), directory);
3696
3697 final int numSegments = segmentInfos.size();
3698
3699 final int numSegmentsToMerge = merge.segments.size();
3700 for(int i=0;i<numSegmentsToMerge;i++) {
3701 final SegmentInfo info = merge.segments.info(i);
3702
3703 if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) {
3704 if (segmentInfos.indexOf(info) == -1)
3705 throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
3706 else
3707 throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle",
3708 directory);
3709 }
3710 }
3711
3712 return first;
3713 }
3714
3715 /** Carefully merges deletes for the segments we just
3716 * merged. This is tricky because, although merging will
3717 * clear all deletes (compacts the documents), new
3718 * deletes may have been flushed to the segments since
3719 * the merge was started. This method "carries over"
3720 * such new deletes onto the newly merged segment, and
3721 * saves the resulting deletes file (incrementing the
3722 * delete generation for merge.info). If no deletes were
3723 * flushed, no new deletes file is saved. */
3724 synchronized private void commitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader) throws IOException {
3725
3726 assert testPoint("startCommitMergeDeletes");
3727
3728 final SegmentInfos sourceSegments = merge.segments;
3729
3730 if (infoStream != null)
3731 message("commitMergeDeletes " + merge.segString(directory));
3732
3733 // Carefully merge deletes that occurred after we
3734 // started merging:
3735 int docUpto = 0;
3736 int delCount = 0;
3737
3738 for(int i=0; i < sourceSegments.size(); i++) {
3739 SegmentInfo info = sourceSegments.info(i);
3740 int docCount = info.docCount;
3741 SegmentReader previousReader = merge.readersClone[i];
3742 SegmentReader currentReader = merge.readers[i];
3743 if (previousReader.hasDeletions()) {
3744
3745 // There were deletes on this segment when the merge
3746 // started. The merge has collapsed away those
3747 // deletes, but, if new deletes were flushed since
3748 // the merge started, we must now carefully keep any
3749 // newly flushed deletes but mapping them to the new
3750 // docIDs.
3751
3752 if (currentReader.numDeletedDocs() > previousReader.numDeletedDocs()) {
3753 // This means this segment has had new deletes
3754 // committed since we started the merge, so we
3755 // must merge them:
3756 for(int j=0;j<docCount;j++) {
3757 if (previousReader.isDeleted(j))
3758 assert currentReader.isDeleted(j);
3759 else {
3760 if (currentReader.isDeleted(j)) {
3761 mergeReader.doDelete(docUpto);
3762 delCount++;
3763 }
3764 docUpto++;
3765 }
3766 }
3767 } else {
3768 docUpto += docCount - previousReader.numDeletedDocs();
3769 }
3770 } else if (currentReader.hasDeletions()) {
3771 // This segment had no deletes before but now it
3772 // does:
3773 for(int j=0; j<docCount; j++) {
3774 if (currentReader.isDeleted(j)) {
3775 mergeReader.doDelete(docUpto);
3776 delCount++;
3777 }
3778 docUpto++;
3779 }
3780 } else
3781 // No deletes before or after
3782 docUpto += info.docCount;
3783 }
3784
3785 assert mergeReader.numDeletedDocs() == delCount;
3786
3787 mergeReader.hasChanges = delCount >= 0;
3788 }
3789
3790 /* FIXME if we want to support non-contiguous segment merges */
3791 synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader) throws IOException {
3792
3793 assert testPoint("startCommitMerge");
3794
3795 if (hitOOM) {
3796 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete merge");
3797 }
3798
3799 if (infoStream != null)
3800 message("commitMerge: " + merge.segString(directory) + " index=" + segString());
3801
3802 assert merge.registerDone;
3803
3804 // If merge was explicitly aborted, or, if rollback() or
3805 // rollbackTransaction() had been called since our merge
3806 // started (which results in an unqualified
3807 // deleter.refresh() call that will remove any index
3808 // file that current segments does not reference), we
3809 // abort this merge
3810 if (merge.isAborted()) {
3811 if (infoStream != null)
3812 message("commitMerge: skipping merge " + merge.segString(directory) + ": it was aborted");
3813
3814 deleter.refresh(merge.info.name);
3815 return false;
3816 }
3817
3818 final int start = ensureContiguousMerge(merge);
3819
3820 commitMergedDeletes(merge, mergedReader);
3821 docWriter.remapDeletes(segmentInfos, merger.getDocMaps(), merger.getDelCounts(), merge, mergedDocCount);
3822
3823 // Simple optimization: if the doc store we are using
3824 // has been closed and is in now compound format (but
3825 // wasn't when we started), then we will switch to the
3826 // compound format as well:
3827 final String mergeDocStoreSegment = merge.info.getDocStoreSegment();
3828 if (mergeDocStoreSegment != null && !merge.info.getDocStoreIsCompoundFile()) {
3829 final int size = segmentInfos.size();
3830 for(int i=0;i<size;i++) {
3831 final SegmentInfo info = segmentInfos.info(i);
3832 final String docStoreSegment = info.getDocStoreSegment();
3833 if (docStoreSegment != null &&
3834 docStoreSegment.equals(mergeDocStoreSegment) &&
3835 info.getDocStoreIsCompoundFile()) {
3836 merge.info.setDocStoreIsCompoundFile(true);
3837 break;
3838 }
3839 }
3840 }
3841
3842 merge.info.setHasProx(merger.hasProx());
3843
3844 segmentInfos.subList(start, start + merge.segments.size()).clear();
3845 assert !segmentInfos.contains(merge.info);
3846 segmentInfos.add(start, merge.info);
3847
3848 // Must note the change to segmentInfos so any commits
3849 // in-flight don't lose it:
3850 checkpoint();
3851
3852 // If the merged segments had pending changes, clear
3853 // them so that they don't bother writing them to
3854 // disk, updating SegmentInfo, etc.:
3855 readerPool.clear(merge.segments);
3856
3857 if (merge.optimize)
3858 segmentsToOptimize.add(merge.info);
3859 return true;
3860 }
3861
3862 private synchronized void decrefMergeSegments(MergePolicy.OneMerge merge) throws IOException {
3863 assert merge.increfDone;
3864 merge.increfDone = false;
3865 }
3866
3867 final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
3868
3869 if (infoStream != null) {
3870 message("handleMergeException: merge=" + merge.segString(directory) + " exc=" + t);
3871 }
3872
3873 // Set the exception on the merge, so if
3874 // optimize() is waiting on us it sees the root
3875 // cause exception:
3876 merge.setException(t);
3877 addMergeException(merge);
3878
3879 if (t instanceof MergePolicy.MergeAbortedException) {
3880 // We can ignore this exception (it happens when
3881 // close(false) or rollback is called), unless the
3882 // merge involves segments from external directories,
3883 // in which case we must throw it so, for example, the
3884 // rollbackTransaction code in addIndexes* is
3885 // executed.
3886 if (merge.isExternal)
3887 throw (MergePolicy.MergeAbortedException) t;
3888 } else if (t instanceof IOException)
3889 throw (IOException) t;
3890 else if (t instanceof RuntimeException)
3891 throw (RuntimeException) t;
3892 else if (t instanceof Error)
3893 throw (Error) t;
3894 else
3895 // Should not get here
3896 throw new RuntimeException(t);
3897 }
3898
3899 /**
3900 * Merges the indicated segments, replacing them in the stack with a
3901 * single segment.
3902 */
3903
3904 final void merge(MergePolicy.OneMerge merge)
3905 throws CorruptIndexException, IOException {
3906
3907 boolean success = false;
3908
3909 try {
3910 try {
3911 try {
3912 mergeInit(merge);
3913
3914 if (infoStream != null)
3915 message("now merge\n merge=" + merge.segString(directory) + "\n merge=" + merge + "\n index=" + segString());
3916
3917 mergeMiddle(merge);
3918 mergeSuccess(merge);
3919 success = true;
3920 } catch (Throwable t) {
3921 handleMergeException(t, merge);
3922 }
3923 } finally {
3924 synchronized(this) {
3925 mergeFinish(merge);
3926
3927 if (!success) {
3928 if (infoStream != null)
3929 message("hit exception during merge");
3930 if (merge.info != null && !segmentInfos.contains(merge.info))
3931 deleter.refresh(merge.info.name);
3932 }
3933
3934 // This merge (and, generally, any change to the
3935 // segments) may now enable new merges, so we call
3936 // merge policy & update pending merges.
3937 if (success && !merge.isAborted() && !closed && !closing)
3938 updatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize);
3939 }
3940 }
3941 } catch (OutOfMemoryError oom) {
3942 handleOOM(oom, "merge");
3943 }
3944 }
3945
3946 /** Hook that's called when the specified merge is complete. */
3947 void mergeSuccess(MergePolicy.OneMerge merge) {
3948 }
3949
3950 /** Checks whether this merge involves any segments
3951 * already participating in a merge. If not, this merge
3952 * is "registered", meaning we record that its segments
3953 * are now participating in a merge, and true is
3954 * returned. Else (the merge conflicts) false is
3955 * returned. */
3956 final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException {
3957
3958 if (merge.registerDone)
3959 return true;
3960
3961 if (stopMerges) {
3962 merge.abort();
3963 throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory));
3964 }
3965
3966 final int count = merge.segments.size();
3967 boolean isExternal = false;
3968 for(int i=0;i<count;i++) {
3969 final SegmentInfo info = merge.segments.info(i);
3970 if (mergingSegments.contains(info))
3971 return false;
3972 if (segmentInfos.indexOf(info) == -1)
3973 return false;
3974 if (info.dir != directory)
3975 isExternal = true;
3976 }
3977
3978 ensureContiguousMerge(merge);
3979
3980 pendingMerges.add(merge);
3981
3982 if (infoStream != null)
3983 message("add merge to pendingMerges: " + merge.segString(directory) + " [total " + pendingMerges.size() + " pending]");
3984
3985 merge.mergeGen = mergeGen;
3986 merge.isExternal = isExternal;
3987
3988 // OK it does not conflict; now record that this merge
3989 // is running (while synchronized) to avoid race
3990 // condition where two conflicting merges from different
3991 // threads, start
3992 for(int i=0;i<count;i++)
3993 mergingSegments.add(merge.segments.info(i));
3994
3995 // Merge is now registered
3996 merge.registerDone = true;
3997 return true;
3998 }
3999
4000 /** Does initial setup for a merge, which is fast but holds
4001 * the synchronized lock on IndexWriter instance. */
4002 final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
4003 boolean success = false;
4004 try {
4005 _mergeInit(merge);
4006 success = true;
4007 } finally {
4008 if (!success) {
4009 mergeFinish(merge);
4010 }
4011 }
4012 }
4013
4014 final synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException {
4015
4016 assert testPoint("startMergeInit");
4017
4018 assert merge.registerDone;
4019 assert !merge.optimize || merge.maxNumSegmentsOptimize > 0;
4020
4021 if (hitOOM) {
4022 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
4023 }
4024
4025 if (merge.info != null)
4026 // mergeInit already done
4027 return;
4028
4029 if (merge.isAborted())
4030 return;
4031
4032 applyDeletes();
4033
4034 final SegmentInfos sourceSegments = merge.segments;
4035 final int end = sourceSegments.size();
4036
4037 // Check whether this merge will allow us to skip
4038 // merging the doc stores (stored field & vectors).
4039 // This is a very substantial optimization (saves tons
4040 // of IO).
4041
4042 Directory lastDir = directory;
4043 String lastDocStoreSegment = null;
4044 int next = -1;
4045
4046 boolean mergeDocStores = false;
4047 boolean doFlushDocStore = false;
4048 final String currentDocStoreSegment = docWriter.getDocStoreSegment();
4049
4050 // Test each segment to be merged: check if we need to
4051 // flush/merge doc stores
4052 for (int i = 0; i < end; i++) {
4053 SegmentInfo si = sourceSegments.info(i);
4054
4055 // If it has deletions we must merge the doc stores
4056 if (si.hasDeletions())
4057 mergeDocStores = true;
4058
4059 // If it has its own (private) doc stores we must
4060 // merge the doc stores
4061 if (-1 == si.getDocStoreOffset())
4062 mergeDocStores = true;
4063
4064 // If it has a different doc store segment than
4065 // previous segments, we must merge the doc stores
4066 String docStoreSegment = si.getDocStoreSegment();
4067 if (docStoreSegment == null)
4068 mergeDocStores = true;
4069 else if (lastDocStoreSegment == null)
4070 lastDocStoreSegment = docStoreSegment;
4071 else if (!lastDocStoreSegment.equals(docStoreSegment))
4072 mergeDocStores = true;
4073
4074 // Segments' docScoreOffsets must be in-order,
4075 // contiguous. For the default merge policy now
4076 // this will always be the case but for an arbitrary
4077 // merge policy this may not be the case
4078 if (-1 == next)
4079 next = si.getDocStoreOffset() + si.docCount;
4080 else if (next != si.getDocStoreOffset())
4081 mergeDocStores = true;
4082 else
4083 next = si.getDocStoreOffset() + si.docCount;
4084
4085 // If the segment comes from a different directory
4086 // we must merge
4087 if (lastDir != si.dir)
4088 mergeDocStores = true;
4089
4090 // If the segment is referencing the current "live"
4091 // doc store outputs then we must merge
4092 if (si.getDocStoreOffset() != -1 && currentDocStoreSegment != null && si.getDocStoreSegment().equals(currentDocStoreSegment)) {
4093 doFlushDocStore = true;
4094 }
4095 }
4096
4097 final int docStoreOffset;
4098 final String docStoreSegment;
4099 final boolean docStoreIsCompoundFile;
4100
4101 if (mergeDocStores) {
4102 docStoreOffset = -1;
4103 docStoreSegment = null;
4104 docStoreIsCompoundFile = false;
4105 } else {
4106 SegmentInfo si = sourceSegments.info(0);
4107 docStoreOffset = si.getDocStoreOffset();
4108 docStoreSegment = si.getDocStoreSegment();
4109 docStoreIsCompoundFile = si.getDocStoreIsCompoundFile();
4110 }
4111
4112 if (mergeDocStores && doFlushDocStore) {
4113 // SegmentMerger intends to merge the doc stores
4114 // (stored fields, vectors), and at least one of the
4115 // segments to be merged refers to the currently
4116 // live doc stores.
4117
4118 // TODO: if we know we are about to merge away these
4119 // newly flushed doc store files then we should not
4120 // make compound file out of them...
4121 if (infoStream != null)
4122 message("now flush at merge");
4123 doFlush(true, false);
4124 }
4125
4126 merge.increfDone = true;
4127
4128 merge.mergeDocStores = mergeDocStores;
4129
4130 // Bind a new segment name here so even with
4131 // ConcurrentMergePolicy we keep deterministic segment
4132 // names.
4133 merge.info = new SegmentInfo(newSegmentName(), 0,
4134 directory, false, true,
4135 docStoreOffset,
4136 docStoreSegment,
4137 docStoreIsCompoundFile,
4138 false);
4139
4140
4141 Map<String,String> details = new HashMap<String,String>();
4142 details.put("optimize", merge.optimize+"");
4143 details.put("mergeFactor", end+"");
4144 details.put("mergeDocStores", mergeDocStores+"");
4145 setDiagnostics(merge.info, "merge", details);
4146
4147 // Also enroll the merged segment into mergingSegments;
4148 // this prevents it from getting selected for a merge
4149 // after our merge is done but while we are building the
4150 // CFS:
4151 mergingSegments.add(merge.info);
4152 }
4153
4154 private void setDiagnostics(SegmentInfo info, String source) {
4155 setDiagnostics(info, source, null);
4156 }
4157
4158 private void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) {
4159 Map<String,String> diagnostics = new HashMap<String,String>();
4160 diagnostics.put("source", source);
4161 diagnostics.put("lucene.version", Constants.LUCENE_VERSION);
4162 diagnostics.put("os", Constants.OS_NAME+"");
4163 diagnostics.put("os.arch", Constants.OS_ARCH+"");
4164 diagnostics.put("os.version", Constants.OS_VERSION+"");
4165 diagnostics.put("java.version", Constants.JAVA_VERSION+"");
4166 diagnostics.put("java.vendor", Constants.JAVA_VENDOR+"");
4167 if (details != null) {
4168 diagnostics.putAll(details);
4169 }
4170 info.setDiagnostics(diagnostics);
4171 }
4172
4173 /** Does fininishing for a merge, which is fast but holds
4174 * the synchronized lock on IndexWriter instance. */
4175 final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException {
4176
4177 // Optimize, addIndexes or finishMerges may be waiting
4178 // on merges to finish.
4179 notifyAll();
4180
4181 if (merge.increfDone)
4182 decrefMergeSegments(merge);
4183
4184 // It's possible we are called twice, eg if there was an
4185 // exception inside mergeInit
4186 if (merge.registerDone) {
4187 final SegmentInfos sourceSegments = merge.segments;
4188 final int end = sourceSegments.size();
4189 for(int i=0;i<end;i++)
4190 mergingSegments.remove(sourceSegments.info(i));
4191 mergingSegments.remove(merge.info);
4192 merge.registerDone = false;
4193 }
4194
4195 runningMerges.remove(merge);
4196 }
4197
4198 /** Does the actual (time-consuming) work of the merge,
4199 * but without holding synchronized lock on IndexWriter
4200 * instance */
4201 final private int mergeMiddle(MergePolicy.OneMerge merge)
4202 throws CorruptIndexException, IOException {
4203
4204 merge.checkAborted(directory);
4205
4206 final String mergedName = merge.info.name;
4207
4208 SegmentMerger merger = null;
4209
4210 int mergedDocCount = 0;
4211
4212 SegmentInfos sourceSegments = merge.segments;
4213 final int numSegments = sourceSegments.size();
4214
4215 if (infoStream != null)
4216 message("merging " + merge.segString(directory));
4217
4218 merger = new SegmentMerger(this, mergedName, merge);
4219
4220 merge.readers = new SegmentReader[numSegments];
4221 merge.readersClone = new SegmentReader[numSegments];
4222
4223 boolean mergeDocStores = false;
4224
4225 final Set<String> dss = new HashSet<String>();
4226
4227 // This is try/finally to make sure merger's readers are
4228 // closed:
4229 boolean success = false;
4230 try {
4231 int totDocCount = 0;
4232
4233 for (int i = 0; i < numSegments; i++) {
4234
4235 final SegmentInfo info = sourceSegments.info(i);
4236
4237 // Hold onto the "live" reader; we will use this to
4238 // commit merged deletes
4239 SegmentReader reader = merge.readers[i] = readerPool.get(info, merge.mergeDocStores,
4240 MERGE_READ_BUFFER_SIZE,
4241 -1);
4242
4243 // We clone the segment readers because other
4244 // deletes may come in while we're merging so we
4245 // need readers that will not change
4246 SegmentReader clone = merge.readersClone[i] = (SegmentReader) reader.clone(true);
4247 merger.add(clone);
4248
4249 if (clone.hasDeletions()) {
4250 mergeDocStores = true;
4251 }
4252
4253 if (info.getDocStoreOffset() != -1) {
4254 dss.add(info.getDocStoreSegment());
4255 }
4256
4257 totDocCount += clone.numDocs();
4258 }
4259
4260 if (infoStream != null) {
4261 message("merge: total "+totDocCount+" docs");
4262 }
4263
4264 merge.checkAborted(directory);
4265
4266 // If deletions have arrived and it has now become
4267 // necessary to merge doc stores, go and open them:
4268 if (mergeDocStores && !merge.mergeDocStores) {
4269 merge.mergeDocStores = true;
4270 synchronized(this) {
4271 if (dss.contains(docWriter.getDocStoreSegment())) {
4272 if (infoStream != null)
4273 message("now flush at mergeMiddle");
4274 doFlush(true, false);
4275 }
4276 }
4277
4278 for(int i=0;i<numSegments;i++) {
4279 merge.readersClone[i].openDocStores();
4280 }
4281
4282 // Clear DSS
4283 synchronized(this) {
4284 merge.info.setDocStore(-1, null, false);
4285 }
4286 }
4287
4288 // This is where all the work happens:
4289 mergedDocCount = merge.info.docCount = merger.merge(merge.mergeDocStores);
4290
4291 assert mergedDocCount == totDocCount;
4292
4293 // TODO: in the non-realtime case, we may want to only
4294 // keep deletes (it's costly to open entire reader
4295 // when we just need deletes)
4296
4297 final SegmentReader mergedReader = readerPool.get(merge.info, false, BufferedIndexInput.BUFFER_SIZE, -1);
4298 try {
4299 if (poolReaders && mergedSegmentWarmer != null) {
4300 mergedSegmentWarmer.warm(mergedReader);
4301 }
4302 if (!commitMerge(merge, merger, mergedDocCount, mergedReader))
4303 // commitMerge will return false if this merge was aborted
4304 return 0;
4305 } finally {
4306 synchronized(this) {
4307 readerPool.release(mergedReader);
4308 }
4309 }
4310
4311 success = true;
4312 } finally {
4313 synchronized(this) {
4314 if (!success) {
4315 // Suppress any new exceptions so we throw the
4316 // original cause
4317 for (int i=0;i<numSegments;i++) {
4318 if (merge.readers[i] != null) {
4319 try {
4320 readerPool.release(merge.readers[i], true);
4321 } catch (Throwable t) {
4322 }
4323 }
4324
4325 if (merge.readersClone[i] != null) {
4326 try {
4327 merge.readersClone[i].close();
4328 } catch (Throwable t) {
4329 }
4330 // This was a private clone and we had the only reference
4331 assert merge.readersClone[i].getRefCount() == 0;
4332 }
4333 }
4334 } else {
4335 for (int i=0;i<numSegments;i++) {
4336 if (merge.readers[i] != null) {
4337 readerPool.release(merge.readers[i], true);
4338 }
4339
4340 if (merge.readersClone[i] != null) {
4341 merge.readersClone[i].close();
4342 // This was a private clone and we had the only reference
4343 assert merge.readersClone[i].getRefCount() == 0;
4344 }
4345 }
4346 }
4347 }
4348 }
4349
4350 // Must checkpoint before decrefing so any newly
4351 // referenced files in the new merge.info are incref'd
4352 // first:
4353 synchronized(this) {
4354 deleter.checkpoint(segmentInfos, false);
4355 }
4356 decrefMergeSegments(merge);
4357
4358 if (merge.useCompoundFile) {
4359
4360 success = false;
4361 final String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION;
4362
4363 try {
4364 merger.createCompoundFile(compoundFileName);
4365 success = true;
4366 } catch (IOException ioe) {
4367 synchronized(this) {
4368 if (merge.isAborted()) {
4369 // This can happen if rollback or close(false)
4370 // is called -- fall through to logic below to
4371 // remove the partially created CFS:
4372 success = true;
4373 } else
4374 handleMergeException(ioe, merge);
4375 }
4376 } catch (Throwable t) {
4377 handleMergeException(t, merge);
4378 } finally {
4379 if (!success) {
4380 if (infoStream != null)
4381 message("hit exception creating compound file during merge");
4382 synchronized(this) {
4383 deleter.deleteFile(compoundFileName);
4384 }
4385 }
4386 }
4387
4388 if (merge.isAborted()) {
4389 if (infoStream != null)
4390 message("abort merge after building CFS");
4391 deleter.deleteFile(compoundFileName);
4392 return 0;
4393 }
4394
4395 synchronized(this) {
4396 if (segmentInfos.indexOf(merge.info) == -1 || merge.isAborted()) {
4397 // Our segment (committed in non-compound
4398 // format) got merged away while we were
4399 // building the compound format.
4400 deleter.deleteFile(compoundFileName);
4401 } else {
4402 merge.info.setUseCompoundFile(true);
4403 checkpoint();
4404 }
4405 }
4406 }
4407
4408 return mergedDocCount;
4409 }
4410
4411 synchronized void addMergeException(MergePolicy.OneMerge merge) {
4412 assert merge.getException() != null;
4413 if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen)
4414 mergeExceptions.add(merge);
4415 }
4416
4417 // Apply buffered deletes to all segments.
4418 private final synchronized boolean applyDeletes() throws CorruptIndexException, IOException {
4419 assert testPoint("startApplyDeletes");
4420 flushDeletesCount++;
4421 SegmentInfos rollback = (SegmentInfos) segmentInfos.clone();
4422 boolean success = false;
4423 boolean changed;
4424 try {
4425 changed = docWriter.applyDeletes(segmentInfos);
4426 success = true;
4427 } finally {
4428 if (!success) {
4429 if (infoStream != null)
4430 message("hit exception flushing deletes");
4431
4432 // Carefully remove any partially written .del
4433 // files
4434 final int size = rollback.size();
4435 for(int i=0;i<size;i++) {
4436 final String newDelFileName = segmentInfos.info(i).getDelFileName();
4437 final String delFileName = rollback.info(i).getDelFileName();
4438 if (newDelFileName != null && !newDelFileName.equals(delFileName))
4439 deleter.deleteFile(newDelFileName);
4440 }
4441
4442 // Fully replace the segmentInfos since flushed
4443 // deletes could have changed any of the
4444 // SegmentInfo instances:
4445 segmentInfos.clear();
4446 segmentInfos.addAll(rollback);
4447 }
4448 }
4449
4450 if (changed)
4451 checkpoint();
4452 return changed;
4453 }
4454
4455 // For test purposes.
4456 final synchronized int getBufferedDeleteTermsSize() {
4457 return docWriter.getBufferedDeleteTerms().size();
4458 }
4459
4460 // For test purposes.
4461 final synchronized int getNumBufferedDeleteTerms() {
4462 return docWriter.getNumBufferedDeleteTerms();
4463 }
4464
4465 // utility routines for tests
4466 SegmentInfo newestSegment() {
4467 return segmentInfos.info(segmentInfos.size()-1);
4468 }
4469
4470 public synchronized String segString() {
4471 return segString(segmentInfos);
4472 }
4473
4474 private synchronized String segString(SegmentInfos infos) {
4475 StringBuilder buffer = new StringBuilder();
4476 final int count = infos.size();
4477 for(int i = 0; i < count; i++) {
4478 if (i > 0) {
4479 buffer.append(' ');
4480 }
4481 final SegmentInfo info = infos.info(i);
4482 buffer.append(info.segString(directory));
4483 if (info.dir != directory)
4484 buffer.append("**");
4485 }
4486 return buffer.toString();
4487 }
4488
4489 // Files that have been sync'd already
4490 private HashSet<String> synced = new HashSet<String>();
4491
4492 // Files that are now being sync'd
4493 private HashSet<String> syncing = new HashSet<String>();
4494
4495 private boolean startSync(String fileName, Collection<String> pending) {
4496 synchronized(synced) {
4497 if (!synced.contains(fileName)) {
4498 if (!syncing.contains(fileName)) {
4499 syncing.add(fileName);
4500 return true;
4501 } else {
4502 pending.add(fileName);
4503 return false;
4504 }
4505 } else
4506 return false;
4507 }
4508 }
4509
4510 private void finishSync(String fileName, boolean success) {
4511 synchronized(synced) {
4512 assert syncing.contains(fileName);
4513 syncing.remove(fileName);
4514 if (success)
4515 synced.add(fileName);
4516 synced.notifyAll();
4517 }
4518 }
4519
4520 /** Blocks until all files in syncing are sync'd */
4521 private boolean waitForAllSynced(Collection<String> syncing) throws IOException {
4522 synchronized(synced) {
4523 Iterator<String> it = syncing.iterator();
4524 while(it.hasNext()) {
4525 final String fileName = it.next();
4526 while(!synced.contains(fileName)) {
4527 if (!syncing.contains(fileName))
4528 // There was an error because a file that was
4529 // previously syncing failed to appear in synced
4530 return false;
4531 else
4532 try {
4533 synced.wait();
4534 } catch (InterruptedException ie) {
4535 throw new ThreadInterruptedException(ie);
4536 }
4537 }
4538 }
4539 return true;
4540 }
4541 }
4542
4543 private synchronized void doWait() {
4544 // NOTE: the callers of this method should in theory
4545 // be able to do simply wait(), but, as a defense
4546 // against thread timing hazards where notifyAll()
4547 // falls to be called, we wait for at most 1 second
4548 // and then return so caller can check if wait
4549 // conditions are satisfied:
4550 try {
4551 wait(1000);
4552 } catch (InterruptedException ie) {
4553 throw new ThreadInterruptedException(ie);
4554 }
4555 }
4556
4557 /** Walk through all files referenced by the current
4558 * segmentInfos and ask the Directory to sync each file,
4559 * if it wasn't already. If that succeeds, then we
4560 * prepare a new segments_N file but do not fully commit
4561 * it. */
4562 private void startCommit(long sizeInBytes, Map<String,String> commitUserData) throws IOException {
4563
4564 assert testPoint("startStartCommit");
4565
4566 // TODO: as of LUCENE-2095, we can simplify this method,
4567 // since only 1 thread can be in here at once
4568
4569 if (hitOOM) {
4570 throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
4571 }
4572
4573 try {
4574
4575 if (infoStream != null)
4576 message("startCommit(): start sizeInBytes=" + sizeInBytes);
4577
4578 SegmentInfos toSync = null;
4579 final long myChangeCount;
4580
4581 synchronized(this) {
4582
4583 // Wait for any running addIndexes to complete
4584 // first, then block any from running until we've
4585 // copied the segmentInfos we intend to sync:
4586 blockAddIndexes(false);
4587
4588 // On commit the segmentInfos must never
4589 // reference a segment in another directory:
4590 assert !hasExternalSegments();
4591
4592 try {
4593
4594 assert lastCommitChangeCount <= changeCount;
4595
4596 if (changeCount == lastCommitChangeCount) {
4597 if (infoStream != null)
4598 message(" skip startCommit(): no changes pending");
4599 return;
4600 }
4601
4602 // First, we clone & incref the segmentInfos we intend
4603 // to sync, then, without locking, we sync() each file
4604 // referenced by toSync, in the background. Multiple
4605 // threads can be doing this at once, if say a large
4606 // merge and a small merge finish at the same time:
4607
4608 if (infoStream != null)
4609 message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount);
4610
4611 readerPool.commit();
4612
4613 toSync = (SegmentInfos) segmentInfos.clone();
4614
4615 if (commitUserData != null)
4616 toSync.setUserData(commitUserData);
4617
4618 deleter.incRef(toSync, false);
4619 myChangeCount = changeCount;
4620
4621 Collection<String> files = toSync.files(directory, false);
4622 for(final String fileName: files) {
4623 assert directory.fileExists(fileName): "file " + fileName + " does not exist";
4624 }
4625
4626 } finally {
4627 resumeAddIndexes();
4628 }
4629 }
4630
4631 assert testPoint("midStartCommit");
4632
4633 boolean setPending = false;
4634
4635 try {
4636
4637 // Loop until all files toSync references are sync'd:
4638 while(true) {
4639
4640 final Collection<String> pending = new ArrayList<String>();
4641
4642 Iterator<String> it = toSync.files(directory, false).iterator();
4643 while(it.hasNext()) {
4644 final String fileName = it.next();
4645 if (startSync(fileName, pending)) {
4646 boolean success = false;
4647 try {
4648 // Because we incRef'd this commit point, above,
4649 // the file had better exist:
4650 assert directory.fileExists(fileName): "file '" + fileName + "' does not exist dir=" + directory;
4651 if (infoStream != null)
4652 message("now sync " + fileName);
4653 directory.sync(fileName);
4654 success = true;
4655 } finally {
4656 finishSync(fileName, success);
4657 }
4658 }
4659 }
4660
4661 // All files that I require are either synced or being
4662 // synced by other threads. If they are being synced,
4663 // we must at this point block until they are done.
4664 // If this returns false, that means an error in
4665 // another thread resulted in failing to actually
4666 // sync one of our files, so we repeat:
4667 if (waitForAllSynced(pending))
4668 break;
4669 }
4670
4671 assert testPoint("midStartCommit2");
4672
4673 synchronized(this) {
4674 // If someone saved a newer version of segments file
4675 // since I first started syncing my version, I can
4676 // safely skip saving myself since I've been
4677 // superseded:
4678
4679 while(true) {
4680 if (myChangeCount <= lastCommitChangeCount) {
4681 if (infoStream != null) {
4682 message("sync superseded by newer infos");
4683 }
4684 break;
4685 } else if (pendingCommit == null) {
4686 // My turn to commit
4687
4688 if (segmentInfos.getGeneration() > toSync.getGeneration())
4689 toSync.updateGeneration(segmentInfos);
4690
4691 boolean success = false;
4692 try {
4693
4694 // Exception here means nothing is prepared
4695 // (this method unwinds everything it did on
4696 // an exception)
4697 try {
4698 toSync.prepareCommit(directory);
4699 } finally {
4700 // Have our master segmentInfos record the
4701 // generations we just prepared. We do this
4702 // on error or success so we don't
4703 // double-write a segments_N file.
4704 segmentInfos.updateGeneration(toSync);
4705 }
4706
4707 assert pendingCommit == null;
4708 setPending = true;
4709 pendingCommit = toSync;
4710 pendingCommitChangeCount = myChangeCount;
4711 success = true;
4712 } finally {
4713 if (!success && infoStream != null)
4714 message("hit exception committing segments file");
4715 }
4716 break;
4717 } else {
4718 // Must wait for other commit to complete
4719 doWait();
4720 }
4721 }
4722 }
4723
4724 if (infoStream != null)
4725 message("done all syncs");
4726
4727 assert testPoint("midStartCommitSuccess");
4728
4729 } finally {
4730 synchronized(this) {
4731 if (!setPending)
4732 deleter.decRef(toSync);
4733 }
4734 }
4735 } catch (OutOfMemoryError oom) {
4736 handleOOM(oom, "startCommit");
4737 }
4738 assert testPoint("finishStartCommit");
4739 }
4740
4741 /**
4742 * Returns <code>true</code> iff the index in the named directory is
4743 * currently locked.
4744 * @param directory the directory to check for a lock
4745 * @throws IOException if there is a low-level IO error
4746 */
4747 public static boolean isLocked(Directory directory) throws IOException {
4748 return directory.makeLock(WRITE_LOCK_NAME).isLocked();
4749 }
4750
4751 /**
4752 * Forcibly unlocks the index in the named directory.
4753 * <P>
4754 * Caution: this should only be used by failure recovery code,
4755 * when it is known that no other process nor thread is in fact
4756 * currently accessing this index.
4757 */
4758 public static void unlock(Directory directory) throws IOException {
4759 directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
4760 }
4761
4762 /**
4763 * Specifies maximum field length (in number of tokens/terms) in {@link IndexWriter} constructors.
4764 * {@link #setMaxFieldLength(int)} overrides the value set by
4765 * the constructor.
4766 */
4767 public static final class MaxFieldLength {
4768
4769 private int limit;
4770 private String name;
4771
4772 /**
4773 * Private type-safe-enum-pattern constructor.
4774 *
4775 * @param name instance name
4776 * @param limit maximum field length
4777 */
4778 private MaxFieldLength(String name, int limit) {
4779 this.name = name;
4780 this.limit = limit;
4781 }
4782
4783 /**
4784 * Public constructor to allow users to specify the maximum field size limit.
4785 *
4786 * @param limit The maximum field length
4787 */
4788 public MaxFieldLength(int limit) {
4789 this("User-specified", limit);
4790 }
4791
4792 public int getLimit() {
4793 return limit;
4794 }
4795
4796 @Override
4797 public String toString()
4798 {
4799 return name + ":" + limit;
4800 }
4801
4802 /** Sets the maximum field length to {@link Integer#MAX_VALUE}. */
4803 public static final MaxFieldLength UNLIMITED
4804 = new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE);
4805
4806 /**
4807 * Sets the maximum field length to
4808 * {@link #DEFAULT_MAX_FIELD_LENGTH}
4809 * */
4810 public static final MaxFieldLength LIMITED
4811 = new MaxFieldLength("LIMITED", DEFAULT_MAX_FIELD_LENGTH);
4812 }
4813
4814 /** If {@link #getReader} has been called (ie, this writer
4815 * is in near real-time mode), then after a merge
4816 * completes, this class can be invoked to warm the
4817 * reader on the newly merged segment, before the merge
4818 * commits. This is not required for near real-time
4819 * search, but will reduce search latency on opening a
4820 * new near real-time reader after a merge completes.
4821 *
4822 * <p><b>NOTE:</b> This API is experimental and might
4823 * change in incompatible ways in the next release.</p>
4824 *
4825 * <p><b>NOTE</b>: warm is called before any deletes have
4826 * been carried over to the merged segment. */
4827 public static abstract class IndexReaderWarmer {
4828 public abstract void warm(IndexReader reader) throws IOException;
4829 }
4830
4831 private IndexReaderWarmer mergedSegmentWarmer;
4832
4833 /** Set the merged segment warmer. See {@link
4834 * IndexReaderWarmer}. */
4835 public void setMergedSegmentWarmer(IndexReaderWarmer warmer) {
4836 mergedSegmentWarmer = warmer;
4837 }
4838
4839 /** Returns the current merged segment warmer. See {@link
4840 * IndexReaderWarmer}. */
4841 public IndexReaderWarmer getMergedSegmentWarmer() {
4842 return mergedSegmentWarmer;
4843 }
4844
4845 private void handleOOM(OutOfMemoryError oom, String location) {
4846 if (infoStream != null) {
4847 message("hit OutOfMemoryError inside " + location);
4848 }
4849 hitOOM = true;
4850 throw oom;
4851 }
4852
4853 // Used only by assert for testing. Current points:
4854 // startDoFlush
4855 // startCommitMerge
4856 // startStartCommit
4857 // midStartCommit
4858 // midStartCommit2
4859 // midStartCommitSuccess
4860 // finishStartCommit
4861 // startCommitMergeDeletes
4862 // startMergeInit
4863 // startApplyDeletes
4864 // DocumentsWriter.ThreadState.init start
4865 boolean testPoint(String name) {
4866 return true;
4867 }
4868
4869 synchronized boolean nrtIsCurrent(SegmentInfos infos) {
4870 if (!infos.equals(segmentInfos)) {
4871 // if any structural changes (new segments), we are
4872 // stale
4873 return false;
4874 } else {
4875 return !docWriter.anyChanges();
4876 }
4877 }
4878
4879 synchronized boolean isClosed() {
4880 return closed;
4881 }
4882 }