Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » index » [javadoc | source]
    1   package org.apache.lucene.index;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import org.apache.lucene.analysis.Analyzer;
   21   import org.apache.lucene.document.Document;
   22   import org.apache.lucene.index.DocumentsWriter.IndexingChain;
   23   import org.apache.lucene.search.Similarity;
   24   import org.apache.lucene.search.Query;
   25   import org.apache.lucene.store.Directory;
   26   import org.apache.lucene.store.Lock;
   27   import org.apache.lucene.store.LockObtainFailedException;
   28   import org.apache.lucene.store.AlreadyClosedException;
   29   import org.apache.lucene.store.BufferedIndexInput;
   30   import org.apache.lucene.util.Constants;
   31   import org.apache.lucene.util.ThreadInterruptedException;
   32   
   33   import java.io.IOException;
   34   import java.io.Closeable;
   35   import java.io.PrintStream;
   36   import java.util.List;
   37   import java.util.Collection;
   38   import java.util.ArrayList;
   39   import java.util.HashMap;
   40   import java.util.Set;
   41   import java.util.HashSet;
   42   import java.util.LinkedList;
   43   import java.util.Iterator;
   44   import java.util.Map;
   45   
   46   /**
   47     An <code>IndexWriter</code> creates and maintains an index.
   48   
   49     <p>The <code>create</code> argument to the {@link
   50     #IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines 
   51     whether a new index is created, or whether an existing index is
   52     opened.  Note that you can open an index with <code>create=true</code>
   53     even while readers are using the index.  The old readers will 
   54     continue to search the "point in time" snapshot they had opened, 
   55     and won't see the newly created index until they re-open.  There are
   56     also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors}
   57     with no <code>create</code> argument which will create a new index
   58     if there is not already an index at the provided path and otherwise 
   59     open the existing index.</p>
   60   
   61     <p>In either case, documents are added with {@link #addDocument(Document)
   62     addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
   63     #deleteDocuments(Query)}. A document can be updated with {@link
   64     #updateDocument(Term, Document) updateDocument} (which just deletes
   65     and then adds the entire document). When finished adding, deleting 
   66     and updating documents, {@link #close() close} should be called.</p>
   67   
   68     <a name="flush"></a>
   69     <p>These changes are buffered in memory and periodically
   70     flushed to the {@link Directory} (during the above method
   71     calls).  A flush is triggered when there are enough
   72     buffered deletes (see {@link #setMaxBufferedDeleteTerms})
   73     or enough added documents since the last flush, whichever
   74     is sooner.  For the added documents, flushing is triggered
   75     either by RAM usage of the documents (see {@link
   76     #setRAMBufferSizeMB}) or the number of added documents.
   77     The default is to flush when RAM usage hits 16 MB.  For
   78     best indexing speed you should flush by RAM usage with a
   79     large RAM buffer.  Note that flushing just moves the
   80     internal buffered state in IndexWriter into the index, but
   81     these changes are not visible to IndexReader until either
   82     {@link #commit()} or {@link #close} is called.  A flush may
   83     also trigger one or more segment merges which by default
   84     run with a background thread so as not to block the
   85     addDocument calls (see <a href="#mergePolicy">below</a>
   86     for changing the {@link MergeScheduler}).</p>
   87   
   88     <p>If an index will not have more documents added for a while and optimal search
   89     performance is desired, then either the full {@link #optimize() optimize}
   90     method or partial {@link #optimize(int)} method should be
   91     called before the index is closed.</p>
   92   
   93     <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
   94     another <code>IndexWriter</code> on the same directory will lead to a
   95     {@link LockObtainFailedException}. The {@link LockObtainFailedException}
   96     is also thrown if an IndexReader on the same directory is used to delete documents
   97     from the index.</p>
   98     
   99     <a name="deletionPolicy"></a>
  100     <p>Expert: <code>IndexWriter</code> allows an optional
  101     {@link IndexDeletionPolicy} implementation to be
  102     specified.  You can use this to control when prior commits
  103     are deleted from the index.  The default policy is {@link
  104     KeepOnlyLastCommitDeletionPolicy} which removes all prior
  105     commits as soon as a new commit is done (this matches
  106     behavior before 2.2).  Creating your own policy can allow
  107     you to explicitly keep previous "point in time" commits
  108     alive in the index for some time, to allow readers to
  109     refresh to the new commit without having the old commit
  110     deleted out from under them.  This is necessary on
  111     filesystems like NFS that do not support "delete on last
  112     close" semantics, which Lucene's "point in time" search
  113     normally relies on. </p>
  114   
  115     <a name="mergePolicy"></a> <p>Expert:
  116     <code>IndexWriter</code> allows you to separately change
  117     the {@link MergePolicy} and the {@link MergeScheduler}.
  118     The {@link MergePolicy} is invoked whenever there are
  119     changes to the segments in the index.  Its role is to
  120     select which merges to do, if any, and return a {@link
  121     MergePolicy.MergeSpecification} describing the merges.  It
  122     also selects merges to do for optimize().  (The default is
  123     {@link LogByteSizeMergePolicy}.  Then, the {@link
  124     MergeScheduler} is invoked with the requested merges and
  125     it decides when and how to run the merges.  The default is
  126     {@link ConcurrentMergeScheduler}. </p>
  127   
  128     <a name="OOME"></a><p><b>NOTE</b>: if you hit an
  129     OutOfMemoryError then IndexWriter will quietly record this
  130     fact and block all future segment commits.  This is a
  131     defensive measure in case any internal state (buffered
  132     documents and deletions) were corrupted.  Any subsequent
  133     calls to {@link #commit()} will throw an
  134     IllegalStateException.  The only course of action is to
  135     call {@link #close()}, which internally will call {@link
  136     #rollback()}, to undo any changes to the index since the
  137     last commit.  You can also just call {@link #rollback()}
  138     directly.</p>
  139   
  140     <a name="thread-safety"></a><p><b>NOTE</b>: {@link
  141     <code>IndexWriter</code>} instances are completely thread
  142     safe, meaning multiple threads can call any of its
  143     methods, concurrently.  If your application requires
  144     external synchronization, you should <b>not</b>
  145     synchronize on the <code>IndexWriter</code> instance as
  146     this may cause deadlock; use your own (non-Lucene) objects
  147     instead. </p>
  148     
  149     <p><b>NOTE</b>: If you call
  150     <code>Thread.interrupt()</code> on a thread that's within
  151     IndexWriter, IndexWriter will try to catch this (eg, if
  152     it's in a wait() or Thread.sleep()), and will then throw
  153     the unchecked exception {@link ThreadInterruptedException}
  154     and <b>clear</b> the interrupt status on the thread.</p>
  155   */
  156   
  157   /*
  158    * Clarification: Check Points (and commits)
  159    * IndexWriter writes new index files to the directory without writing a new segments_N
  160    * file which references these new files. It also means that the state of 
  161    * the in memory SegmentInfos object is different than the most recent
  162    * segments_N file written to the directory.
  163    * 
  164    * Each time the SegmentInfos is changed, and matches the (possibly 
  165    * modified) directory files, we have a new "check point". 
  166    * If the modified/new SegmentInfos is written to disk - as a new 
  167    * (generation of) segments_N file - this check point is also an 
  168    * IndexCommit.
  169    * 
  170    * A new checkpoint always replaces the previous checkpoint and 
  171    * becomes the new "front" of the index. This allows the IndexFileDeleter 
  172    * to delete files that are referenced only by stale checkpoints.
  173    * (files that were created since the last commit, but are no longer
  174    * referenced by the "front" of the index). For this, IndexFileDeleter 
  175    * keeps track of the last non commit checkpoint.
  176    */
  177   public class IndexWriter implements Closeable {
  178   
  179     /**
  180      * Default value for the write lock timeout (1,000).
  181      * @see #setDefaultWriteLockTimeout
  182      */
  183     public static long WRITE_LOCK_TIMEOUT = 1000;
  184   
  185     private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
  186   
  187     /**
  188      * Name of the write lock in the index.
  189      */
  190     public static final String WRITE_LOCK_NAME = "write.lock";
  191   
  192     /**
  193      * Value to denote a flush trigger is disabled
  194      */
  195     public final static int DISABLE_AUTO_FLUSH = -1;
  196   
  197     /**
  198      * Disabled by default (because IndexWriter flushes by RAM usage
  199      * by default). Change using {@link #setMaxBufferedDocs(int)}.
  200      */
  201     public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
  202   
  203     /**
  204      * Default value is 16 MB (which means flush when buffered
  205      * docs consume 16 MB RAM).  Change using {@link #setRAMBufferSizeMB}.
  206      */
  207     public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
  208   
  209     /**
  210      * Disabled by default (because IndexWriter flushes by RAM usage
  211      * by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
  212      */
  213     public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
  214   
  215     /**
  216      * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
  217      */
  218     public final static int DEFAULT_MAX_FIELD_LENGTH = 10000;
  219   
  220     /**
  221      * Default value is 128. Change using {@link #setTermIndexInterval(int)}.
  222      */
  223     public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;
  224   
  225     /**
  226      * Absolute hard maximum length for a term.  If a term
  227      * arrives from the analyzer longer than this length, it
  228      * is skipped and a message is printed to infoStream, if
  229      * set (see {@link #setInfoStream}).
  230      */
  231     public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
  232   
  233     // The normal read buffer size defaults to 1024, but
  234     // increasing this during merging seems to yield
  235     // performance gains.  However we don't want to increase
  236     // it too much because there are quite a few
  237     // BufferedIndexInputs created during merging.  See
  238     // LUCENE-888 for details.
  239     private final static int MERGE_READ_BUFFER_SIZE = 4096;
  240   
  241     // Used for printing messages
  242     private static Object MESSAGE_ID_LOCK = new Object();
  243     private static int MESSAGE_ID = 0;
  244     private int messageID = -1;
  245     volatile private boolean hitOOM;
  246   
  247     private Directory directory;  // where this index resides
  248     private Analyzer analyzer;    // how to analyze text
  249   
  250     private Similarity similarity = Similarity.getDefault(); // how to normalize
  251   
  252     private volatile long changeCount; // increments every time a change is completed
  253     private long lastCommitChangeCount; // last changeCount that was committed
  254   
  255     private SegmentInfos rollbackSegmentInfos;      // segmentInfos we will fallback to if the commit fails
  256     private HashMap<SegmentInfo,Integer> rollbackSegments;
  257   
  258     volatile SegmentInfos pendingCommit;            // set when a commit is pending (after prepareCommit() & before commit())
  259     volatile long pendingCommitChangeCount;
  260   
  261     private SegmentInfos localRollbackSegmentInfos;      // segmentInfos we will fallback to if the commit fails
  262     private int localFlushedDocCount;               // saved docWriter.getFlushedDocCount during local transaction
  263   
  264     private SegmentInfos segmentInfos = new SegmentInfos();       // the segments
  265   
  266     private DocumentsWriter docWriter;
  267     private IndexFileDeleter deleter;
  268   
  269     private Set<SegmentInfo> segmentsToOptimize = new HashSet<SegmentInfo>();           // used by optimize to note those needing optimization
  270   
  271     private Lock writeLock;
  272   
  273     private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
  274   
  275     private boolean closed;
  276     private boolean closing;
  277   
  278     // Holds all SegmentInfo instances currently involved in
  279     // merges
  280     private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>();
  281   
  282     private MergePolicy mergePolicy = new LogByteSizeMergePolicy(this);
  283     private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
  284     private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>();
  285     private Set<MergePolicy.OneMerge> runningMerges = new HashSet<MergePolicy.OneMerge>();
  286     private List<MergePolicy.OneMerge> mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
  287     private long mergeGen;
  288     private boolean stopMerges;
  289   
  290     private int flushCount;
  291     private int flushDeletesCount;
  292   
  293     // Used to only allow one addIndexes to proceed at once
  294     // TODO: use ReadWriteLock once we are on 5.0
  295     private int readCount;                          // count of how many threads are holding read lock
  296     private Thread writeThread;                     // non-null if any thread holds write lock
  297     final ReaderPool readerPool = new ReaderPool();
  298     private int upgradeCount;
  299     
  300     // This is a "write once" variable (like the organic dye
  301     // on a DVD-R that may or may not be heated by a laser and
  302     // then cooled to permanently record the event): it's
  303     // false, until getReader() is called for the first time,
  304     // at which point it's switched to true and never changes
  305     // back to false.  Once this is true, we hold open and
  306     // reuse SegmentReader instances internally for applying
  307     // deletes, doing merges, and reopening near real-time
  308     // readers.
  309     private volatile boolean poolReaders;
  310     
  311     /**
  312      * Expert: returns a readonly reader, covering all
  313      * committed as well as un-committed changes to the index.
  314      * This provides "near real-time" searching, in that
  315      * changes made during an IndexWriter session can be
  316      * quickly made available for searching without closing
  317      * the writer nor calling {@link #commit}.
  318      *
  319      * <p>Note that this is functionally equivalent to calling
  320      * {#commit} and then using {@link IndexReader#open} to
  321      * open a new reader.  But the turarnound time of this
  322      * method should be faster since it avoids the potentially
  323      * costly {@link #commit}.</p>
  324      *
  325      * <p>You must close the {@link IndexReader} returned by
  326      * this method once you are done using it.</p>
  327      *
  328      * <p>It's <i>near</i> real-time because there is no hard
  329      * guarantee on how quickly you can get a new reader after
  330      * making changes with IndexWriter.  You'll have to
  331      * experiment in your situation to determine if it's
  332      * fast enough.  As this is a new and experimental
  333      * feature, please report back on your findings so we can
  334      * learn, improve and iterate.</p>
  335      *
  336      * <p>The resulting reader supports {@link
  337      * IndexReader#reopen}, but that call will simply forward
  338      * back to this method (though this may change in the
  339      * future).</p>
  340      *
  341      * <p>The very first time this method is called, this
  342      * writer instance will make every effort to pool the
  343      * readers that it opens for doing merges, applying
  344      * deletes, etc.  This means additional resources (RAM,
  345      * file descriptors, CPU time) will be consumed.</p>
  346      *
  347      * <p>For lower latency on reopening a reader, you should
  348      * call {@link #setMergedSegmentWarmer} to
  349      * pre-warm a newly merged segment before it's committed
  350      * to the index.  This is important for minimizing
  351      * index-to-search delay after a large merge.  </p>
  352      *
  353      * <p>If an addIndexes* call is running in another thread,
  354      * then this reader will only search those segments from
  355      * the foreign index that have been successfully copied
  356      * over, so far</p>.
  357      *
  358      * <p><b>NOTE</b>: Once the writer is closed, any
  359      * outstanding readers may continue to be used.  However,
  360      * if you attempt to reopen any of those readers, you'll
  361      * hit an {@link AlreadyClosedException}.</p>
  362      *
  363      * <p><b>NOTE:</b> This API is experimental and might
  364      * change in incompatible ways in the next release.</p>
  365      *
  366      * @return IndexReader that covers entire index plus all
  367      * changes made so far by this IndexWriter instance
  368      *
  369      * @throws IOException
  370      */
  371     public IndexReader getReader() throws IOException {
  372       return getReader(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
  373     }
  374   
  375     /** Expert: like {@link #getReader}, except you can
  376      *  specify which termInfosIndexDivisor should be used for
  377      *  any newly opened readers.
  378      * @param termInfosIndexDivisor Subsamples which indexed
  379      *  terms are loaded into RAM. This has the same effect as {@link
  380      *  IndexWriter#setTermIndexInterval} except that setting
  381      *  must be done at indexing time while this setting can be
  382      *  set per reader.  When set to N, then one in every
  383      *  N*termIndexInterval terms in the index is loaded into
  384      *  memory.  By setting this to a value > 1 you can reduce
  385      *  memory usage, at the expense of higher latency when
  386      *  loading a TermInfo.  The default value is 1.  Set this
  387      *  to -1 to skip loading the terms index entirely. */
  388     public IndexReader getReader(int termInfosIndexDivisor) throws IOException {
  389       if (infoStream != null) {
  390         message("flush at getReader");
  391       }
  392   
  393       // Do this up front before flushing so that the readers
  394       // obtained during this flush are pooled, the first time
  395       // this method is called:
  396       poolReaders = true;
  397   
  398       flush(true, true, false);
  399       
  400       // Prevent segmentInfos from changing while opening the
  401       // reader; in theory we could do similar retry logic,
  402       // just like we do when loading segments_N
  403       synchronized(this) {
  404         applyDeletes();
  405         return new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor);
  406       }
  407     }
  408   
  409     /** Holds shared SegmentReader instances. IndexWriter uses
  410      *  SegmentReaders for 1) applying deletes, 2) doing
  411      *  merges, 3) handing out a real-time reader.  This pool
  412      *  reuses instances of the SegmentReaders in all these
  413      *  places if it is in "near real-time mode" (getReader()
  414      *  has been called on this instance). */
  415   
  416     class ReaderPool {
  417   
  418       private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
  419   
  420       /** Forcefully clear changes for the specified segments,
  421        *  and remove from the pool.   This is called on successful merge. */
  422       synchronized void clear(SegmentInfos infos) throws IOException {
  423         if (infos == null) {
  424           for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
  425             ent.getValue().hasChanges = false;
  426           }
  427         } else {
  428           for (final SegmentInfo info: infos) {
  429             if (readerMap.containsKey(info)) {
  430               readerMap.get(info).hasChanges = false;
  431             }
  432           }     
  433         }
  434       }
  435       
  436       // used only by asserts
  437       public synchronized boolean infoIsLive(SegmentInfo info) {
  438         int idx = segmentInfos.indexOf(info);
  439         assert idx != -1;
  440         assert segmentInfos.get(idx) == info;
  441         return true;
  442       }
  443   
  444       public synchronized SegmentInfo mapToLive(SegmentInfo info) {
  445         int idx = segmentInfos.indexOf(info);
  446         if (idx != -1) {
  447           info = segmentInfos.get(idx);
  448         }
  449         return info;
  450       }
  451       
  452       /**
  453        * Release the segment reader (i.e. decRef it and close if there
  454        * are no more references.
  455        * @param sr
  456        * @throws IOException
  457        */
  458       public synchronized void release(SegmentReader sr) throws IOException {
  459         release(sr, false);
  460       }
  461       
  462       /**
  463        * Release the segment reader (i.e. decRef it and close if there
  464        * are no more references.
  465        * @param sr
  466        * @throws IOException
  467        */
  468       public synchronized void release(SegmentReader sr, boolean drop) throws IOException {
  469   
  470         final boolean pooled = readerMap.containsKey(sr.getSegmentInfo());
  471   
  472         assert !pooled | readerMap.get(sr.getSegmentInfo()) == sr;
  473   
  474         // Drop caller's ref
  475         sr.decRef();
  476   
  477         if (pooled && (drop || (!poolReaders && sr.getRefCount() == 1))) {
  478   
  479           // We are the last ref to this reader; since we're
  480           // not pooling readers, we release it:
  481           readerMap.remove(sr.getSegmentInfo());
  482   
  483           assert !sr.hasChanges || Thread.holdsLock(IndexWriter.this);
  484   
  485           // Drop our ref -- this will commit any pending
  486           // changes to the dir
  487           boolean success = false;
  488           try {
  489             sr.close();
  490             success = true;
  491           } finally {
  492             if (!success && sr.hasChanges) {
  493               // Abandon the changes & retry closing:
  494               sr.hasChanges = false;
  495               try {
  496                 sr.close();
  497               } catch (Throwable ignore) {
  498                 // Keep throwing original exception
  499               }
  500             }
  501           }
  502         }
  503       }
  504       
  505       /** Remove all our references to readers, and commits
  506        *  any pending changes. */
  507       synchronized void close() throws IOException {
  508         Iterator<Map.Entry<SegmentInfo,SegmentReader>> iter = readerMap.entrySet().iterator();
  509         while (iter.hasNext()) {
  510           
  511           Map.Entry<SegmentInfo,SegmentReader> ent = iter.next();
  512   
  513           SegmentReader sr = ent.getValue();
  514           if (sr.hasChanges) {
  515             assert infoIsLive(sr.getSegmentInfo());
  516             sr.startCommit();
  517             boolean success = false;
  518             try {
  519               sr.doCommit(null);
  520               success = true;
  521             } finally {
  522               if (!success) {
  523                 sr.rollbackCommit();
  524               }
  525             }
  526           }
  527   
  528           iter.remove();
  529   
  530           // NOTE: it is allowed that this decRef does not
  531           // actually close the SR; this can happen when a
  532           // near real-time reader is kept open after the
  533           // IndexWriter instance is closed
  534           sr.decRef();
  535         }
  536       }
  537       
  538       /**
  539        * Commit all segment reader in the pool.
  540        * @throws IOException
  541        */
  542       synchronized void commit() throws IOException {
  543         for (Map.Entry<SegmentInfo,SegmentReader> ent : readerMap.entrySet()) {
  544   
  545           SegmentReader sr = ent.getValue();
  546           if (sr.hasChanges) {
  547             assert infoIsLive(sr.getSegmentInfo());
  548             sr.startCommit();
  549             boolean success = false;
  550             try {
  551               sr.doCommit(null);
  552               success = true;
  553             } finally {
  554               if (!success) {
  555                 sr.rollbackCommit();
  556               }
  557             }
  558           }
  559         }
  560       }
  561       
  562       /**
  563        * Returns a ref to a clone.  NOTE: this clone is not
  564        * enrolled in the pool, so you should simply close()
  565        * it when you're done (ie, do not call release()).
  566        */
  567       public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException {
  568         SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);
  569         try {
  570           return (SegmentReader) sr.clone(true);
  571         } finally {
  572           sr.decRef();
  573         }
  574       }
  575      
  576       /**
  577        * Obtain a SegmentReader from the readerPool.  The reader
  578        * must be returned by calling {@link #release(SegmentReader)}
  579        * @see #release(SegmentReader)
  580        * @param info
  581        * @param doOpenStores
  582        * @throws IOException
  583        */
  584       public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException {
  585         return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
  586       }
  587       /**
  588        * Obtain a SegmentReader from the readerPool.  The reader
  589        * must be returned by calling {@link #release(SegmentReader)}
  590        * 
  591        * @see #release(SegmentReader)
  592        * @param info
  593        * @param doOpenStores
  594        * @param readBufferSize
  595        * @param termsIndexDivisor
  596        * @throws IOException
  597        */
  598       public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException {
  599   
  600         if (poolReaders) {
  601           readBufferSize = BufferedIndexInput.BUFFER_SIZE;
  602         }
  603   
  604         SegmentReader sr = readerMap.get(info);
  605         if (sr == null) {
  606           // TODO: we may want to avoid doing this while
  607           // synchronized
  608           // Returns a ref, which we xfer to readerMap:
  609           sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
  610           readerMap.put(info, sr);
  611         } else {
  612           if (doOpenStores) {
  613             sr.openDocStores();
  614           }
  615           if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) {
  616             // If this reader was originally opened because we
  617             // needed to merge it, we didn't load the terms
  618             // index.  But now, if the caller wants the terms
  619             // index (eg because it's doing deletes, or an NRT
  620             // reader is being opened) we ask the reader to
  621             // load its terms index.
  622             sr.loadTermsIndex(termsIndexDivisor);
  623           }
  624         }
  625   
  626         // Return a ref to our caller
  627         sr.incRef();
  628         return sr;
  629       }
  630   
  631       // Returns a ref
  632       public synchronized SegmentReader getIfExists(SegmentInfo info) throws IOException {
  633         SegmentReader sr = readerMap.get(info);
  634         if (sr != null) {
  635           sr.incRef();
  636         }
  637         return sr;
  638       }
  639     }
  640     
  641     /**
  642      * Obtain the number of deleted docs for a pooled reader.
  643      * If the reader isn't being pooled, the segmentInfo's 
  644      * delCount is returned.
  645      */
  646     public int numDeletedDocs(SegmentInfo info) throws IOException {
  647       SegmentReader reader = readerPool.getIfExists(info);
  648       try {
  649         if (reader != null) {
  650           return reader.numDeletedDocs();
  651         } else {
  652           return info.getDelCount();
  653         }
  654       } finally {
  655         if (reader != null) {
  656           readerPool.release(reader);
  657         }
  658       }
  659     }
  660     
  661     synchronized void acquireWrite() {
  662       assert writeThread != Thread.currentThread();
  663       while(writeThread != null || readCount > 0)
  664         doWait();
  665   
  666       // We could have been closed while we were waiting:
  667       ensureOpen();
  668   
  669       writeThread = Thread.currentThread();
  670     }
  671   
  672     synchronized void releaseWrite() {
  673       assert Thread.currentThread() == writeThread;
  674       writeThread = null;
  675       notifyAll();
  676     }
  677   
  678     synchronized void acquireRead() {
  679       final Thread current = Thread.currentThread();
  680       while(writeThread != null && writeThread != current)
  681         doWait();
  682   
  683       readCount++;
  684     }
  685   
  686     // Allows one readLock to upgrade to a writeLock even if
  687     // there are other readLocks as long as all other
  688     // readLocks are also blocked in this method:
  689     synchronized void upgradeReadToWrite() {
  690       assert readCount > 0;
  691       upgradeCount++;
  692       while(readCount > upgradeCount || writeThread != null) {
  693         doWait();
  694       }
  695       
  696       writeThread = Thread.currentThread();
  697       readCount--;
  698       upgradeCount--;
  699     }
  700   
  701     synchronized void releaseRead() {
  702       readCount--;
  703       assert readCount >= 0;
  704       notifyAll();
  705     }
  706   
  707     synchronized final boolean isOpen(boolean includePendingClose) {
  708       return !(closed || (includePendingClose && closing));
  709     }
  710   
  711     /**
  712      * Used internally to throw an {@link
  713      * AlreadyClosedException} if this IndexWriter has been
  714      * closed.
  715      * @throws AlreadyClosedException if this IndexWriter is
  716      */
  717     protected synchronized final void ensureOpen(boolean includePendingClose) throws AlreadyClosedException {
  718       if (!isOpen(includePendingClose)) {
  719         throw new AlreadyClosedException("this IndexWriter is closed");
  720       }
  721     }
  722   
  723     protected synchronized final void ensureOpen() throws AlreadyClosedException {
  724       ensureOpen(true);
  725     }
  726   
  727     /**
  728      * Prints a message to the infoStream (if non-null),
  729      * prefixed with the identifying information for this
  730      * writer and the thread that's calling it.
  731      */
  732     public void message(String message) {
  733       if (infoStream != null)
  734         infoStream.println("IW " + messageID + " [" + Thread.currentThread().getName() + "]: " + message);
  735     }
  736   
  737     private synchronized void setMessageID(PrintStream infoStream) {
  738       if (infoStream != null && messageID == -1) {
  739         synchronized(MESSAGE_ID_LOCK) {
  740           messageID = MESSAGE_ID++;
  741         }
  742       }
  743       this.infoStream = infoStream;
  744     }
  745   
  746     /**
  747      * Casts current mergePolicy to LogMergePolicy, and throws
  748      * an exception if the mergePolicy is not a LogMergePolicy.
  749      */
  750     private LogMergePolicy getLogMergePolicy() {
  751       if (mergePolicy instanceof LogMergePolicy)
  752         return (LogMergePolicy) mergePolicy;
  753       else
  754         throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
  755     }
  756   
  757     /** <p>Get the current setting of whether newly flushed
  758      *  segments will use the compound file format.  Note that
  759      *  this just returns the value previously set with
  760      *  setUseCompoundFile(boolean), or the default value
  761      *  (true).  You cannot use this to query the status of
  762      *  previously flushed segments.</p>
  763      *
  764      *  <p>Note that this method is a convenience method: it
  765      *  just calls mergePolicy.getUseCompoundFile as long as
  766      *  mergePolicy is an instance of {@link LogMergePolicy}.
  767      *  Otherwise an IllegalArgumentException is thrown.</p>
  768      *
  769      *  @see #setUseCompoundFile(boolean)
  770      */
  771     public boolean getUseCompoundFile() {
  772       return getLogMergePolicy().getUseCompoundFile();
  773     }
  774   
  775     /** <p>Setting to turn on usage of a compound file. When on,
  776      *  multiple files for each segment are merged into a
  777      *  single file when a new segment is flushed.</p>
  778      *
  779      *  <p>Note that this method is a convenience method: it
  780      *  just calls mergePolicy.setUseCompoundFile as long as
  781      *  mergePolicy is an instance of {@link LogMergePolicy}.
  782      *  Otherwise an IllegalArgumentException is thrown.</p>
  783      */
  784     public void setUseCompoundFile(boolean value) {
  785       getLogMergePolicy().setUseCompoundFile(value);
  786       getLogMergePolicy().setUseCompoundDocStore(value);
  787     }
  788   
  789     /** Expert: Set the Similarity implementation used by this IndexWriter.
  790      *
  791      * @see Similarity#setDefault(Similarity)
  792      */
  793     public void setSimilarity(Similarity similarity) {
  794       ensureOpen();
  795       this.similarity = similarity;
  796       docWriter.setSimilarity(similarity);
  797     }
  798   
  799     /** Expert: Return the Similarity implementation used by this IndexWriter.
  800      *
  801      * <p>This defaults to the current value of {@link Similarity#getDefault()}.
  802      */
  803     public Similarity getSimilarity() {
  804       ensureOpen();
  805       return this.similarity;
  806     }
  807   
  808     /** Expert: Set the interval between indexed terms.  Large values cause less
  809      * memory to be used by IndexReader, but slow random-access to terms.  Small
  810      * values cause more memory to be used by an IndexReader, and speed
  811      * random-access to terms.
  812      *
  813      * This parameter determines the amount of computation required per query
  814      * term, regardless of the number of documents that contain that term.  In
  815      * particular, it is the maximum number of other terms that must be
  816      * scanned before a term is located and its frequency and position information
  817      * may be processed.  In a large index with user-entered query terms, query
  818      * processing time is likely to be dominated not by term lookup but rather
  819      * by the processing of frequency and positional data.  In a small index
  820      * or when many uncommon query terms are generated (e.g., by wildcard
  821      * queries) term lookup may become a dominant cost.
  822      *
  823      * In particular, <code>numUniqueTerms/interval</code> terms are read into
  824      * memory by an IndexReader, and, on average, <code>interval/2</code> terms
  825      * must be scanned for each random term access.
  826      *
  827      * @see #DEFAULT_TERM_INDEX_INTERVAL
  828      */
  829     public void setTermIndexInterval(int interval) {
  830       ensureOpen();
  831       this.termIndexInterval = interval;
  832     }
  833   
  834     /** Expert: Return the interval between indexed terms.
  835      *
  836      * @see #setTermIndexInterval(int)
  837      */
  838     public int getTermIndexInterval() {
  839       // We pass false because this method is called by SegmentMerger while we are in the process of closing
  840       ensureOpen(false);
  841       return termIndexInterval;
  842     }
  843   
  844     /**
  845      * Constructs an IndexWriter for the index in <code>d</code>.
  846      * Text will be analyzed with <code>a</code>.  If <code>create</code>
  847      * is true, then a new, empty index will be created in
  848      * <code>d</code>, replacing the index already there, if any.
  849      *
  850      * @param d the index directory
  851      * @param a the analyzer to use
  852      * @param create <code>true</code> to create the index or overwrite
  853      *  the existing one; <code>false</code> to append to the existing
  854      *  index
  855      * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
  856      *   via the MaxFieldLength constructor.
  857      * @throws CorruptIndexException if the index is corrupt
  858      * @throws LockObtainFailedException if another writer
  859      *  has this index open (<code>write.lock</code> could not
  860      *  be obtained)
  861      * @throws IOException if the directory cannot be read/written to, or
  862      *  if it does not exist and <code>create</code> is
  863      *  <code>false</code> or if there is any other low-level
  864      *  IO error
  865      */
  866     public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)
  867          throws CorruptIndexException, LockObtainFailedException, IOException {
  868       init(d, a, create, null, mfl.getLimit(), null, null);
  869     }
  870   
  871     /**
  872      * Constructs an IndexWriter for the index in
  873      * <code>d</code>, first creating it if it does not
  874      * already exist.  Text will be analyzed with
  875      * <code>a</code>.
  876      *
  877      * @param d the index directory
  878      * @param a the analyzer to use
  879      * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
  880      *   via the MaxFieldLength constructor.
  881      * @throws CorruptIndexException if the index is corrupt
  882      * @throws LockObtainFailedException if another writer
  883      *  has this index open (<code>write.lock</code> could not
  884      *  be obtained)
  885      * @throws IOException if the directory cannot be
  886      *  read/written to or if there is any other low-level
  887      *  IO error
  888      */
  889     public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
  890       throws CorruptIndexException, LockObtainFailedException, IOException {
  891       init(d, a, null, mfl.getLimit(), null, null);
  892     }
  893   
  894     /**
  895      * Expert: constructs an IndexWriter with a custom {@link
  896      * IndexDeletionPolicy}, for the index in <code>d</code>,
  897      * first creating it if it does not already exist.  Text
  898      * will be analyzed with <code>a</code>.
  899      *
  900      * @param d the index directory
  901      * @param a the analyzer to use
  902      * @param deletionPolicy see <a href="#deletionPolicy">above</a>
  903      * @param mfl whether or not to limit field lengths
  904      * @throws CorruptIndexException if the index is corrupt
  905      * @throws LockObtainFailedException if another writer
  906      *  has this index open (<code>write.lock</code> could not
  907      *  be obtained)
  908      * @throws IOException if the directory cannot be
  909      *  read/written to or if there is any other low-level
  910      *  IO error
  911      */
  912     public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
  913       throws CorruptIndexException, LockObtainFailedException, IOException {
  914       init(d, a, deletionPolicy, mfl.getLimit(), null, null);
  915     }
  916   
  917     /**
  918      * Expert: constructs an IndexWriter with a custom {@link
  919      * IndexDeletionPolicy}, for the index in <code>d</code>.
  920      * Text will be analyzed with <code>a</code>.  If
  921      * <code>create</code> is true, then a new, empty index
  922      * will be created in <code>d</code>, replacing the index
  923      * already there, if any.
  924      *
  925      * @param d the index directory
  926      * @param a the analyzer to use
  927      * @param create <code>true</code> to create the index or overwrite
  928      *  the existing one; <code>false</code> to append to the existing
  929      *  index
  930      * @param deletionPolicy see <a href="#deletionPolicy">above</a>
  931      * @param mfl {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths.  Value is in number of terms/tokens
  932      * @throws CorruptIndexException if the index is corrupt
  933      * @throws LockObtainFailedException if another writer
  934      *  has this index open (<code>write.lock</code> could not
  935      *  be obtained)
  936      * @throws IOException if the directory cannot be read/written to, or
  937      *  if it does not exist and <code>create</code> is
  938      *  <code>false</code> or if there is any other low-level
  939      *  IO error
  940      */
  941     public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
  942          throws CorruptIndexException, LockObtainFailedException, IOException {
  943       init(d, a, create, deletionPolicy, mfl.getLimit(), null, null);
  944     }
  945     
  946     /**
  947      * Expert: constructs an IndexWriter with a custom {@link
  948      * IndexDeletionPolicy} and {@link IndexingChain}, 
  949      * for the index in <code>d</code>.
  950      * Text will be analyzed with <code>a</code>.  If
  951      * <code>create</code> is true, then a new, empty index
  952      * will be created in <code>d</code>, replacing the index
  953      * already there, if any.
  954      *
  955      * @param d the index directory
  956      * @param a the analyzer to use
  957      * @param create <code>true</code> to create the index or overwrite
  958      *  the existing one; <code>false</code> to append to the existing
  959      *  index
  960      * @param deletionPolicy see <a href="#deletionPolicy">above</a>
  961      * @param mfl whether or not to limit field lengths, value is in number of terms/tokens.  See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
  962      * @param indexingChain the {@link DocConsumer} chain to be used to 
  963      *  process documents
  964      * @param commit which commit to open
  965      * @throws CorruptIndexException if the index is corrupt
  966      * @throws LockObtainFailedException if another writer
  967      *  has this index open (<code>write.lock</code> could not
  968      *  be obtained)
  969      * @throws IOException if the directory cannot be read/written to, or
  970      *  if it does not exist and <code>create</code> is
  971      *  <code>false</code> or if there is any other low-level
  972      *  IO error
  973      */
  974     IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit)
  975          throws CorruptIndexException, LockObtainFailedException, IOException {
  976       init(d, a, create, deletionPolicy, mfl.getLimit(), indexingChain, commit);
  977     }
  978     
  979     /**
  980      * Expert: constructs an IndexWriter on specific commit
  981      * point, with a custom {@link IndexDeletionPolicy}, for
  982      * the index in <code>d</code>.  Text will be analyzed
  983      * with <code>a</code>.
  984      *
  985      * <p> This is only meaningful if you've used a {@link
  986      * IndexDeletionPolicy} in that past that keeps more than
  987      * just the last commit.
  988      * 
  989      * <p>This operation is similar to {@link #rollback()},
  990      * except that method can only rollback what's been done
  991      * with the current instance of IndexWriter since its last
  992      * commit, whereas this method can rollback to an
  993      * arbitrary commit point from the past, assuming the
  994      * {@link IndexDeletionPolicy} has preserved past
  995      * commits.
  996      *
  997      * @param d the index directory
  998      * @param a the analyzer to use
  999      * @param deletionPolicy see <a href="#deletionPolicy">above</a>
 1000      * @param mfl whether or not to limit field lengths, value is in number of terms/tokens.  See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
 1001      * @param commit which commit to open
 1002      * @throws CorruptIndexException if the index is corrupt
 1003      * @throws LockObtainFailedException if another writer
 1004      *  has this index open (<code>write.lock</code> could not
 1005      *  be obtained)
 1006      * @throws IOException if the directory cannot be read/written to, or
 1007      *  if it does not exist and <code>create</code> is
 1008      *  <code>false</code> or if there is any other low-level
 1009      *  IO error
 1010      */
 1011     public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)
 1012          throws CorruptIndexException, LockObtainFailedException, IOException {
 1013       init(d, a, false, deletionPolicy, mfl.getLimit(), null, commit);
 1014     }
 1015   
 1016     private void init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, 
 1017                       int maxFieldLength, IndexingChain indexingChain, IndexCommit commit)
 1018       throws CorruptIndexException, LockObtainFailedException, IOException {
 1019       if (IndexReader.indexExists(d)) {
 1020         init(d, a, false, deletionPolicy, maxFieldLength, indexingChain, commit);
 1021       } else {
 1022         init(d, a, true, deletionPolicy, maxFieldLength, indexingChain, commit);
 1023       }
 1024     }
 1025   
 1026     private void init(Directory d, Analyzer a, final boolean create,  
 1027                       IndexDeletionPolicy deletionPolicy, int maxFieldLength,
 1028                       IndexingChain indexingChain, IndexCommit commit)
 1029       throws CorruptIndexException, LockObtainFailedException, IOException {
 1030   
 1031       directory = d;
 1032       analyzer = a;
 1033       setMessageID(defaultInfoStream);
 1034       this.maxFieldLength = maxFieldLength;
 1035   
 1036       if (indexingChain == null)
 1037         indexingChain = DocumentsWriter.DefaultIndexingChain;
 1038   
 1039       if (create) {
 1040         // Clear the write lock in case it's leftover:
 1041         directory.clearLock(WRITE_LOCK_NAME);
 1042       }
 1043   
 1044       Lock writeLock = directory.makeLock(WRITE_LOCK_NAME);
 1045       if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
 1046         throw new LockObtainFailedException("Index locked for write: " + writeLock);
 1047       this.writeLock = writeLock;                   // save it
 1048   
 1049       try {
 1050         if (create) {
 1051           // Try to read first.  This is to allow create
 1052           // against an index that's currently open for
 1053           // searching.  In this case we write the next
 1054           // segments_N file with no segments:
 1055           boolean doCommit;
 1056           try {
 1057             segmentInfos.read(directory);
 1058             segmentInfos.clear();
 1059             doCommit = false;
 1060           } catch (IOException e) {
 1061             // Likely this means it's a fresh directory
 1062             doCommit = true;
 1063           }
 1064   
 1065           if (doCommit) {
 1066             // Only commit if there is no segments file in
 1067             // this dir already.
 1068             segmentInfos.commit(directory);
 1069             synced.addAll(segmentInfos.files(directory, true));
 1070           } else {
 1071             // Record that we have a change (zero out all
 1072             // segments) pending:
 1073             changeCount++;
 1074           }
 1075         } else {
 1076           segmentInfos.read(directory);
 1077   
 1078           if (commit != null) {
 1079             // Swap out all segments, but, keep metadata in
 1080             // SegmentInfos, like version & generation, to
 1081             // preserve write-once.  This is important if
 1082             // readers are open against the future commit
 1083             // points.
 1084             if (commit.getDirectory() != directory)
 1085               throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory");
 1086             SegmentInfos oldInfos = new SegmentInfos();
 1087             oldInfos.read(directory, commit.getSegmentsFileName());
 1088             segmentInfos.replace(oldInfos);
 1089             changeCount++;
 1090             if (infoStream != null)
 1091               message("init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
 1092           }
 1093   
 1094           // We assume that this segments_N was previously
 1095           // properly sync'd:
 1096           synced.addAll(segmentInfos.files(directory, true));
 1097         }
 1098   
 1099         setRollbackSegmentInfos(segmentInfos);
 1100   
 1101         docWriter = new DocumentsWriter(directory, this, indexingChain);
 1102         docWriter.setInfoStream(infoStream);
 1103         docWriter.setMaxFieldLength(maxFieldLength);
 1104   
 1105         // Default deleter (for backwards compatibility) is
 1106         // KeepOnlyLastCommitDeleter:
 1107         deleter = new IndexFileDeleter(directory,
 1108                                        deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
 1109                                        segmentInfos, infoStream, docWriter);
 1110   
 1111         if (deleter.startingCommitDeleted)
 1112           // Deletion policy deleted the "head" commit point.
 1113           // We have to mark ourself as changed so that if we
 1114           // are closed w/o any further changes we write a new
 1115           // segments_N file.
 1116           changeCount++;
 1117   
 1118         pushMaxBufferedDocs();
 1119   
 1120         if (infoStream != null) {
 1121           message("init: create=" + create);
 1122           messageState();
 1123         }
 1124   
 1125       } catch (IOException e) {
 1126         this.writeLock.release();
 1127         this.writeLock = null;
 1128         throw e;
 1129       }
 1130     }
 1131   
 1132     private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
 1133       rollbackSegmentInfos = (SegmentInfos) infos.clone();
 1134       assert !rollbackSegmentInfos.hasExternalSegments(directory);
 1135       rollbackSegments = new HashMap<SegmentInfo,Integer>();
 1136       final int size = rollbackSegmentInfos.size();
 1137       for(int i=0;i<size;i++)
 1138         rollbackSegments.put(rollbackSegmentInfos.info(i), Integer.valueOf(i));
 1139     }
 1140   
 1141     /**
 1142      * Expert: set the merge policy used by this writer.
 1143      */
 1144     public void setMergePolicy(MergePolicy mp) {
 1145       ensureOpen();
 1146       if (mp == null)
 1147         throw new NullPointerException("MergePolicy must be non-null");
 1148   
 1149       if (mergePolicy != mp)
 1150         mergePolicy.close();
 1151       mergePolicy = mp;
 1152       pushMaxBufferedDocs();
 1153       if (infoStream != null)
 1154         message("setMergePolicy " + mp);
 1155     }
 1156   
 1157     /**
 1158      * Expert: returns the current MergePolicy in use by this writer.
 1159      * @see #setMergePolicy
 1160      */
 1161     public MergePolicy getMergePolicy() {
 1162       ensureOpen();
 1163       return mergePolicy;
 1164     }
 1165   
 1166     /**
 1167      * Expert: set the merge scheduler used by this writer.
 1168      */
 1169     synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {
 1170       ensureOpen();
 1171       if (mergeScheduler == null)
 1172         throw new NullPointerException("MergeScheduler must be non-null");
 1173   
 1174       if (this.mergeScheduler != mergeScheduler) {
 1175         finishMerges(true);
 1176         this.mergeScheduler.close();
 1177       }
 1178       this.mergeScheduler = mergeScheduler;
 1179       if (infoStream != null)
 1180         message("setMergeScheduler " + mergeScheduler);
 1181     }
 1182   
 1183     /**
 1184      * Expert: returns the current MergePolicy in use by this
 1185      * writer.
 1186      * @see #setMergePolicy
 1187      */
 1188     public MergeScheduler getMergeScheduler() {
 1189       ensureOpen();
 1190       return mergeScheduler;
 1191     }
 1192   
 1193     /** <p>Determines the largest segment (measured by
 1194      * document count) that may be merged with other segments.
 1195      * Small values (e.g., less than 10,000) are best for
 1196      * interactive indexing, as this limits the length of
 1197      * pauses while indexing to a few seconds.  Larger values
 1198      * are best for batched indexing and speedier
 1199      * searches.</p>
 1200      *
 1201      * <p>The default value is {@link Integer#MAX_VALUE}.</p>
 1202      *
 1203      * <p>Note that this method is a convenience method: it
 1204      * just calls mergePolicy.setMaxMergeDocs as long as
 1205      * mergePolicy is an instance of {@link LogMergePolicy}.
 1206      * Otherwise an IllegalArgumentException is thrown.</p>
 1207      *
 1208      * <p>The default merge policy ({@link
 1209      * LogByteSizeMergePolicy}) also allows you to set this
 1210      * limit by net size (in MB) of the segment, using {@link
 1211      * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
 1212      */
 1213     public void setMaxMergeDocs(int maxMergeDocs) {
 1214       getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
 1215     }
 1216   
 1217     /**
 1218      * <p>Returns the largest segment (measured by document
 1219      * count) that may be merged with other segments.</p>
 1220      *
 1221      * <p>Note that this method is a convenience method: it
 1222      * just calls mergePolicy.getMaxMergeDocs as long as
 1223      * mergePolicy is an instance of {@link LogMergePolicy}.
 1224      * Otherwise an IllegalArgumentException is thrown.</p>
 1225      *
 1226      * @see #setMaxMergeDocs
 1227      */
 1228     public int getMaxMergeDocs() {
 1229       return getLogMergePolicy().getMaxMergeDocs();
 1230     }
 1231   
 1232     /**
 1233      * The maximum number of terms that will be indexed for a single field in a
 1234      * document.  This limits the amount of memory required for indexing, so that
 1235      * collections with very large files will not crash the indexing process by
 1236      * running out of memory.  This setting refers to the number of running terms,
 1237      * not to the number of different terms.<p/>
 1238      * <strong>Note:</strong> this silently truncates large documents, excluding from the
 1239      * index all terms that occur further in the document.  If you know your source
 1240      * documents are large, be sure to set this value high enough to accomodate
 1241      * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
 1242      * is your memory, but you should anticipate an OutOfMemoryError.<p/>
 1243      * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms
 1244      * will be indexed for a field.
 1245      */
 1246     public void setMaxFieldLength(int maxFieldLength) {
 1247       ensureOpen();
 1248       this.maxFieldLength = maxFieldLength;
 1249       docWriter.setMaxFieldLength(maxFieldLength);
 1250       if (infoStream != null)
 1251         message("setMaxFieldLength " + maxFieldLength);
 1252     }
 1253   
 1254     /**
 1255      * Returns the maximum number of terms that will be
 1256      * indexed for a single field in a document.
 1257      * @see #setMaxFieldLength
 1258      */
 1259     public int getMaxFieldLength() {
 1260       ensureOpen();
 1261       return maxFieldLength;
 1262     }
 1263   
 1264     /** Determines the minimal number of documents required
 1265      * before the buffered in-memory documents are flushed as
 1266      * a new Segment.  Large values generally gives faster
 1267      * indexing.
 1268      *
 1269      * <p>When this is set, the writer will flush every
 1270      * maxBufferedDocs added documents.  Pass in {@link
 1271      * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
 1272      * to number of buffered documents.  Note that if flushing
 1273      * by RAM usage is also enabled, then the flush will be
 1274      * triggered by whichever comes first.</p>
 1275      *
 1276      * <p>Disabled by default (writer flushes by RAM usage).</p>
 1277      *
 1278      * @throws IllegalArgumentException if maxBufferedDocs is
 1279      * enabled but smaller than 2, or it disables maxBufferedDocs
 1280      * when ramBufferSize is already disabled
 1281      * @see #setRAMBufferSizeMB
 1282      */
 1283     public void setMaxBufferedDocs(int maxBufferedDocs) {
 1284       ensureOpen();
 1285       if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
 1286         throw new IllegalArgumentException(
 1287             "maxBufferedDocs must at least be 2 when enabled");
 1288       if (maxBufferedDocs == DISABLE_AUTO_FLUSH
 1289           && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
 1290         throw new IllegalArgumentException(
 1291             "at least one of ramBufferSize and maxBufferedDocs must be enabled");
 1292       docWriter.setMaxBufferedDocs(maxBufferedDocs);
 1293       pushMaxBufferedDocs();
 1294       if (infoStream != null)
 1295         message("setMaxBufferedDocs " + maxBufferedDocs);
 1296     }
 1297   
 1298     /**
 1299      * If we are flushing by doc count (not by RAM usage), and
 1300      * using LogDocMergePolicy then push maxBufferedDocs down
 1301      * as its minMergeDocs, to keep backwards compatibility.
 1302      */
 1303     private void pushMaxBufferedDocs() {
 1304       if (docWriter.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {
 1305         final MergePolicy mp = mergePolicy;
 1306         if (mp instanceof LogDocMergePolicy) {
 1307           LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
 1308           final int maxBufferedDocs = docWriter.getMaxBufferedDocs();
 1309           if (lmp.getMinMergeDocs() != maxBufferedDocs) {
 1310             if (infoStream != null)
 1311               message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
 1312             lmp.setMinMergeDocs(maxBufferedDocs);
 1313           }
 1314         }
 1315       }
 1316     }
 1317   
 1318     /**
 1319      * Returns the number of buffered added documents that will
 1320      * trigger a flush if enabled.
 1321      * @see #setMaxBufferedDocs
 1322      */
 1323     public int getMaxBufferedDocs() {
 1324       ensureOpen();
 1325       return docWriter.getMaxBufferedDocs();
 1326     }
 1327   
 1328     /** Determines the amount of RAM that may be used for
 1329      * buffering added documents and deletions before they are
 1330      * flushed to the Directory.  Generally for faster
 1331      * indexing performance it's best to flush by RAM usage
 1332      * instead of document count and use as large a RAM buffer
 1333      * as you can.
 1334      *
 1335      * <p>When this is set, the writer will flush whenever
 1336      * buffered documents and deletions use this much RAM.
 1337      * Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
 1338      * triggering a flush due to RAM usage.  Note that if
 1339      * flushing by document count is also enabled, then the
 1340      * flush will be triggered by whichever comes first.</p>
 1341      *
 1342      * <p> <b>NOTE</b>: the account of RAM usage for pending
 1343      * deletions is only approximate.  Specifically, if you
 1344      * delete by Query, Lucene currently has no way to measure
 1345      * the RAM usage if individual Queries so the accounting
 1346      * will under-estimate and you should compensate by either
 1347      * calling commit() periodically yourself, or by using
 1348      * {@link #setMaxBufferedDeleteTerms} to flush by count
 1349      * instead of RAM usage (each buffered delete Query counts
 1350      * as one).
 1351      *
 1352      * <p> <b>NOTE</b>: because IndexWriter uses
 1353      * <code>int</code>s when managing its internal storage,
 1354      * the absolute maximum value for this setting is somewhat
 1355      * less than 2048 MB.  The precise limit depends on
 1356      * various factors, such as how large your documents are,
 1357      * how many fields have norms, etc., so it's best to set
 1358      * this value comfortably under 2048.</p>
 1359      *
 1360      * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
 1361      * 
 1362      * @throws IllegalArgumentException if ramBufferSize is
 1363      * enabled but non-positive, or it disables ramBufferSize
 1364      * when maxBufferedDocs is already disabled
 1365      */
 1366     public void setRAMBufferSizeMB(double mb) {
 1367       if (mb > 2048.0) {
 1368         throw new IllegalArgumentException("ramBufferSize " + mb + " is too large; should be comfortably less than 2048");
 1369       }
 1370       if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
 1371         throw new IllegalArgumentException(
 1372             "ramBufferSize should be > 0.0 MB when enabled");
 1373       if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
 1374         throw new IllegalArgumentException(
 1375             "at least one of ramBufferSize and maxBufferedDocs must be enabled");
 1376       docWriter.setRAMBufferSizeMB(mb);
 1377       if (infoStream != null)
 1378         message("setRAMBufferSizeMB " + mb);
 1379     }
 1380   
 1381     /**
 1382      * Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
 1383      */
 1384     public double getRAMBufferSizeMB() {
 1385       return docWriter.getRAMBufferSizeMB();
 1386     }
 1387   
 1388     /**
 1389      * <p>Determines the minimal number of delete terms required before the buffered
 1390      * in-memory delete terms are applied and flushed. If there are documents
 1391      * buffered in memory at the time, they are merged and a new segment is
 1392      * created.</p>
 1393   
 1394      * <p>Disabled by default (writer flushes by RAM usage).</p>
 1395      * 
 1396      * @throws IllegalArgumentException if maxBufferedDeleteTerms
 1397      * is enabled but smaller than 1
 1398      * @see #setRAMBufferSizeMB
 1399      */
 1400     public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
 1401       ensureOpen();
 1402       if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
 1403           && maxBufferedDeleteTerms < 1)
 1404         throw new IllegalArgumentException(
 1405             "maxBufferedDeleteTerms must at least be 1 when enabled");
 1406       docWriter.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
 1407       if (infoStream != null)
 1408         message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
 1409     }
 1410   
 1411     /**
 1412      * Returns the number of buffered deleted terms that will
 1413      * trigger a flush if enabled.
 1414      * @see #setMaxBufferedDeleteTerms
 1415      */
 1416     public int getMaxBufferedDeleteTerms() {
 1417       ensureOpen();
 1418       return docWriter.getMaxBufferedDeleteTerms();
 1419     }
 1420   
 1421     /** Determines how often segment indices are merged by addDocument().  With
 1422      * smaller values, less RAM is used while indexing, and searches on
 1423      * unoptimized indices are faster, but indexing speed is slower.  With larger
 1424      * values, more RAM is used during indexing, and while searches on unoptimized
 1425      * indices are slower, indexing is faster.  Thus larger values (> 10) are best
 1426      * for batch index creation, and smaller values (< 10) for indices that are
 1427      * interactively maintained.
 1428      *
 1429      * <p>Note that this method is a convenience method: it
 1430      * just calls mergePolicy.setMergeFactor as long as
 1431      * mergePolicy is an instance of {@link LogMergePolicy}.
 1432      * Otherwise an IllegalArgumentException is thrown.</p>
 1433      *
 1434      * <p>This must never be less than 2.  The default value is 10.
 1435      */
 1436     public void setMergeFactor(int mergeFactor) {
 1437       getLogMergePolicy().setMergeFactor(mergeFactor);
 1438     }
 1439   
 1440     /**
 1441      * <p>Returns the number of segments that are merged at
 1442      * once and also controls the total number of segments
 1443      * allowed to accumulate in the index.</p>
 1444      *
 1445      * <p>Note that this method is a convenience method: it
 1446      * just calls mergePolicy.getMergeFactor as long as
 1447      * mergePolicy is an instance of {@link LogMergePolicy}.
 1448      * Otherwise an IllegalArgumentException is thrown.</p>
 1449      *
 1450      * @see #setMergeFactor
 1451      */
 1452     public int getMergeFactor() {
 1453       return getLogMergePolicy().getMergeFactor();
 1454     }
 1455   
 1456     /** If non-null, this will be the default infoStream used
 1457      * by a newly instantiated IndexWriter.
 1458      * @see #setInfoStream
 1459      */
 1460     public static void setDefaultInfoStream(PrintStream infoStream) {
 1461       IndexWriter.defaultInfoStream = infoStream;
 1462     }
 1463   
 1464     /**
 1465      * Returns the current default infoStream for newly
 1466      * instantiated IndexWriters.
 1467      * @see #setDefaultInfoStream
 1468      */
 1469     public static PrintStream getDefaultInfoStream() {
 1470       return IndexWriter.defaultInfoStream;
 1471     }
 1472   
 1473     /** If non-null, information about merges, deletes and a
 1474      * message when maxFieldLength is reached will be printed
 1475      * to this.
 1476      */
 1477     public void setInfoStream(PrintStream infoStream) {
 1478       ensureOpen();
 1479       setMessageID(infoStream);
 1480       docWriter.setInfoStream(infoStream);
 1481       deleter.setInfoStream(infoStream);
 1482       if (infoStream != null)
 1483         messageState();
 1484     }
 1485   
 1486     private void messageState() {
 1487       message("setInfoStream: dir=" + directory +
 1488               " mergePolicy=" + mergePolicy +
 1489               " mergeScheduler=" + mergeScheduler +
 1490               " ramBufferSizeMB=" + docWriter.getRAMBufferSizeMB() +
 1491               " maxBufferedDocs=" + docWriter.getMaxBufferedDocs() +
 1492               " maxBuffereDeleteTerms=" + docWriter.getMaxBufferedDeleteTerms() +
 1493               " maxFieldLength=" + maxFieldLength +
 1494               " index=" + segString());
 1495     }
 1496   
 1497     /**
 1498      * Returns the current infoStream in use by this writer.
 1499      * @see #setInfoStream
 1500      */
 1501     public PrintStream getInfoStream() {
 1502       ensureOpen();
 1503       return infoStream;
 1504     }
 1505   
 1506     /** Returns true if verbosing is enabled (i.e., infoStream != null). */
 1507     public boolean verbose() {
 1508       return infoStream != null;
 1509     }
 1510     
 1511     /**
 1512      * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter.  @see
 1513      * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
 1514      */
 1515     public void setWriteLockTimeout(long writeLockTimeout) {
 1516       ensureOpen();
 1517       this.writeLockTimeout = writeLockTimeout;
 1518     }
 1519   
 1520     /**
 1521      * Returns allowed timeout when acquiring the write lock.
 1522      * @see #setWriteLockTimeout
 1523      */
 1524     public long getWriteLockTimeout() {
 1525       ensureOpen();
 1526       return writeLockTimeout;
 1527     }
 1528   
 1529     /**
 1530      * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
 1531      * milliseconds).
 1532      */
 1533     public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
 1534       IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;
 1535     }
 1536   
 1537     /**
 1538      * Returns default write lock timeout for newly
 1539      * instantiated IndexWriters.
 1540      * @see #setDefaultWriteLockTimeout
 1541      */
 1542     public static long getDefaultWriteLockTimeout() {
 1543       return IndexWriter.WRITE_LOCK_TIMEOUT;
 1544     }
 1545   
 1546     /**
 1547      * Commits all changes to an index and closes all
 1548      * associated files.  Note that this may be a costly
 1549      * operation, so, try to re-use a single writer instead of
 1550      * closing and opening a new one.  See {@link #commit()} for
 1551      * caveats about write caching done by some IO devices.
 1552      *
 1553      * <p> If an Exception is hit during close, eg due to disk
 1554      * full or some other reason, then both the on-disk index
 1555      * and the internal state of the IndexWriter instance will
 1556      * be consistent.  However, the close will not be complete
 1557      * even though part of it (flushing buffered documents)
 1558      * may have succeeded, so the write lock will still be
 1559      * held.</p>
 1560      * 
 1561      * <p> If you can correct the underlying cause (eg free up
 1562      * some disk space) then you can call close() again.
 1563      * Failing that, if you want to force the write lock to be
 1564      * released (dangerous, because you may then lose buffered
 1565      * docs in the IndexWriter instance) then you can do
 1566      * something like this:</p>
 1567      *
 1568      * <pre>
 1569      * try {
 1570      *   writer.close();
 1571      * } finally {
 1572      *   if (IndexWriter.isLocked(directory)) {
 1573      *     IndexWriter.unlock(directory);
 1574      *   }
 1575      * }
 1576      * </pre>
 1577      *
 1578      * after which, you must be certain not to use the writer
 1579      * instance anymore.</p>
 1580      *
 1581      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 1582      * you should immediately close the writer, again.  See <a
 1583      * href="#OOME">above</a> for details.</p>
 1584      *
 1585      * @throws CorruptIndexException if the index is corrupt
 1586      * @throws IOException if there is a low-level IO error
 1587      */
 1588     public void close() throws CorruptIndexException, IOException {
 1589       close(true);
 1590     }
 1591   
 1592     /**
 1593      * Closes the index with or without waiting for currently
 1594      * running merges to finish.  This is only meaningful when
 1595      * using a MergeScheduler that runs merges in background
 1596      * threads.
 1597      *
 1598      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 1599      * you should immediately close the writer, again.  See <a
 1600      * href="#OOME">above</a> for details.</p>
 1601      *
 1602      * <p><b>NOTE</b>: it is dangerous to always call
 1603      * close(false), especially when IndexWriter is not open
 1604      * for very long, because this can result in "merge
 1605      * starvation" whereby long merges will never have a
 1606      * chance to finish.  This will cause too many segments in
 1607      * your index over time.</p>
 1608      *
 1609      * @param waitForMerges if true, this call will block
 1610      * until all merges complete; else, it will ask all
 1611      * running merges to abort, wait until those merges have
 1612      * finished (which should be at most a few seconds), and
 1613      * then return.
 1614      */
 1615     public void close(boolean waitForMerges) throws CorruptIndexException, IOException {
 1616   
 1617       // Ensure that only one thread actually gets to do the closing:
 1618       if (shouldClose()) {
 1619         // If any methods have hit OutOfMemoryError, then abort
 1620         // on close, in case the internal state of IndexWriter
 1621         // or DocumentsWriter is corrupt
 1622         if (hitOOM)
 1623           rollbackInternal();
 1624         else
 1625           closeInternal(waitForMerges);
 1626       }
 1627     }
 1628   
 1629     // Returns true if this thread should attempt to close, or
 1630     // false if IndexWriter is now closed; else, waits until
 1631     // another thread finishes closing
 1632     synchronized private boolean shouldClose() {
 1633       while(true) {
 1634         if (!closed) {
 1635           if (!closing) {
 1636             closing = true;
 1637             return true;
 1638           } else {
 1639             // Another thread is presently trying to close;
 1640             // wait until it finishes one way (closes
 1641             // successfully) or another (fails to close)
 1642             doWait();
 1643           }
 1644         } else
 1645           return false;
 1646       }
 1647     }
 1648   
 1649     private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {
 1650   
 1651       docWriter.pauseAllThreads();
 1652   
 1653       try {
 1654         if (infoStream != null)
 1655           message("now flush at close");
 1656   
 1657         docWriter.close();
 1658   
 1659         // Only allow a new merge to be triggered if we are
 1660         // going to wait for merges:
 1661         if (!hitOOM) {
 1662           flush(waitForMerges, true, true);
 1663         }
 1664   
 1665         if (waitForMerges)
 1666           // Give merge scheduler last chance to run, in case
 1667           // any pending merges are waiting:
 1668           mergeScheduler.merge(this);
 1669   
 1670         mergePolicy.close();
 1671   
 1672         finishMerges(waitForMerges);
 1673         stopMerges = true;
 1674   
 1675         mergeScheduler.close();
 1676   
 1677         if (infoStream != null)
 1678           message("now call final commit()");
 1679         
 1680         if (!hitOOM) {
 1681           commit(0);
 1682         }
 1683   
 1684         if (infoStream != null)
 1685           message("at close: " + segString());
 1686   
 1687         synchronized(this) {
 1688           readerPool.close();
 1689           docWriter = null;
 1690           deleter.close();
 1691         }
 1692         
 1693         if (writeLock != null) {
 1694           writeLock.release();                          // release write lock
 1695           writeLock = null;
 1696         }
 1697         synchronized(this) {
 1698           closed = true;
 1699         }
 1700       } catch (OutOfMemoryError oom) {
 1701         handleOOM(oom, "closeInternal");
 1702       } finally {
 1703         synchronized(this) {
 1704           closing = false;
 1705           notifyAll();
 1706           if (!closed) {
 1707             if (docWriter != null)
 1708               docWriter.resumeAllThreads();
 1709             if (infoStream != null)
 1710               message("hit exception while closing");
 1711           }
 1712         }
 1713       }
 1714     }
 1715   
 1716     /** Tells the docWriter to close its currently open shared
 1717      *  doc stores (stored fields & vectors files).
 1718      *  Return value specifices whether new doc store files are compound or not.
 1719      */
 1720     private synchronized boolean flushDocStores() throws IOException {
 1721   
 1722       boolean useCompoundDocStore = false;
 1723   
 1724       String docStoreSegment;
 1725   
 1726       boolean success = false;
 1727       try {
 1728         docStoreSegment = docWriter.closeDocStore();
 1729         success = true;
 1730       } finally {
 1731         if (!success && infoStream != null) {
 1732           message("hit exception closing doc store segment");
 1733         }
 1734       }
 1735   
 1736       useCompoundDocStore = mergePolicy.useCompoundDocStore(segmentInfos);
 1737         
 1738       if (useCompoundDocStore && docStoreSegment != null && docWriter.closedFiles().size() != 0) {
 1739         // Now build compound doc store file
 1740   
 1741         if (infoStream != null) {
 1742           message("create compound file " + docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
 1743         }
 1744   
 1745         success = false;
 1746   
 1747         final int numSegments = segmentInfos.size();
 1748         final String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
 1749   
 1750         try {
 1751           CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName);
 1752           for (final String file :  docWriter.closedFiles() ) {
 1753             cfsWriter.addFile(file);
 1754           }
 1755         
 1756           // Perform the merge
 1757           cfsWriter.close();
 1758           success = true;
 1759   
 1760         } finally {
 1761           if (!success) {
 1762             if (infoStream != null)
 1763               message("hit exception building compound file doc store for segment " + docStoreSegment);
 1764             deleter.deleteFile(compoundFileName);
 1765             docWriter.abort();
 1766           }
 1767         }
 1768   
 1769         for(int i=0;i<numSegments;i++) {
 1770           SegmentInfo si = segmentInfos.info(i);
 1771           if (si.getDocStoreOffset() != -1 &&
 1772               si.getDocStoreSegment().equals(docStoreSegment))
 1773             si.setDocStoreIsCompoundFile(true);
 1774         }
 1775   
 1776         checkpoint();
 1777   
 1778         // In case the files we just merged into a CFS were
 1779         // not previously checkpointed:
 1780         deleter.deleteNewFiles(docWriter.closedFiles());
 1781       }
 1782   
 1783       return useCompoundDocStore;
 1784     }
 1785   
 1786     /** Returns the Directory used by this index. */
 1787     public Directory getDirectory() {     
 1788       // Pass false because the flush during closing calls getDirectory
 1789       ensureOpen(false);
 1790       return directory;
 1791     }
 1792   
 1793     /** Returns the analyzer used by this index. */
 1794     public Analyzer getAnalyzer() {
 1795       ensureOpen();
 1796       return analyzer;
 1797     }
 1798   
 1799     /** Returns total number of docs in this index, including
 1800      *  docs not yet flushed (still in the RAM buffer),
 1801      *  not counting deletions.
 1802      *  @see #numDocs */
 1803     public synchronized int maxDoc() {
 1804       int count;
 1805       if (docWriter != null)
 1806         count = docWriter.getNumDocsInRAM();
 1807       else
 1808         count = 0;
 1809   
 1810       for (int i = 0; i < segmentInfos.size(); i++)
 1811         count += segmentInfos.info(i).docCount;
 1812       return count;
 1813     }
 1814   
 1815     /** Returns total number of docs in this index, including
 1816      *  docs not yet flushed (still in the RAM buffer), and
 1817      *  including deletions.  <b>NOTE:</b> buffered deletions
 1818      *  are not counted.  If you really need these to be
 1819      *  counted you should call {@link #commit()} first.
 1820      *  @see #numDocs */
 1821     public synchronized int numDocs() throws IOException {
 1822       int count;
 1823       if (docWriter != null)
 1824         count = docWriter.getNumDocsInRAM();
 1825       else
 1826         count = 0;
 1827   
 1828       for (int i = 0; i < segmentInfos.size(); i++) {
 1829         final SegmentInfo info = segmentInfos.info(i);
 1830         count += info.docCount - info.getDelCount();
 1831       }
 1832       return count;
 1833     }
 1834   
 1835     public synchronized boolean hasDeletions() throws IOException {
 1836       ensureOpen();
 1837       if (docWriter.hasDeletes())
 1838         return true;
 1839       for (int i = 0; i < segmentInfos.size(); i++)
 1840         if (segmentInfos.info(i).hasDeletions())
 1841           return true;
 1842       return false;
 1843     }
 1844   
 1845     /**
 1846      * The maximum number of terms that will be indexed for a single field in a
 1847      * document.  This limits the amount of memory required for indexing, so that
 1848      * collections with very large files will not crash the indexing process by
 1849      * running out of memory.<p/>
 1850      * Note that this effectively truncates large documents, excluding from the
 1851      * index terms that occur further in the document.  If you know your source
 1852      * documents are large, be sure to set this value high enough to accommodate
 1853      * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
 1854      * is your memory, but you should anticipate an OutOfMemoryError.<p/>
 1855      * By default, no more than 10,000 terms will be indexed for a field.
 1856      *
 1857      * @see MaxFieldLength
 1858      */
 1859     private int maxFieldLength;
 1860   
 1861     /**
 1862      * Adds a document to this index.  If the document contains more than
 1863      * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
 1864      * discarded.
 1865      *
 1866      * <p> Note that if an Exception is hit (for example disk full)
 1867      * then the index will be consistent, but this document
 1868      * may not have been added.  Furthermore, it's possible
 1869      * the index will have one segment in non-compound format
 1870      * even when using compound files (when a merge has
 1871      * partially succeeded).</p>
 1872      *
 1873      * <p> This method periodically flushes pending documents
 1874      * to the Directory (see <a href="#flush">above</a>), and
 1875      * also periodically triggers segment merges in the index
 1876      * according to the {@link MergePolicy} in use.</p>
 1877      *
 1878      * <p>Merges temporarily consume space in the
 1879      * directory. The amount of space required is up to 1X the
 1880      * size of all segments being merged, when no
 1881      * readers/searchers are open against the index, and up to
 1882      * 2X the size of all segments being merged when
 1883      * readers/searchers are open against the index (see
 1884      * {@link #optimize()} for details). The sequence of
 1885      * primitive merge operations performed is governed by the
 1886      * merge policy.
 1887      *
 1888      * <p>Note that each term in the document can be no longer
 1889      * than 16383 characters, otherwise an
 1890      * IllegalArgumentException will be thrown.</p>
 1891      *
 1892      * <p>Note that it's possible to create an invalid Unicode
 1893      * string in java if a UTF16 surrogate pair is malformed.
 1894      * In this case, the invalid characters are silently
 1895      * replaced with the Unicode replacement character
 1896      * U+FFFD.</p>
 1897      *
 1898      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 1899      * you should immediately close the writer.  See <a
 1900      * href="#OOME">above</a> for details.</p>
 1901      *
 1902      * @throws CorruptIndexException if the index is corrupt
 1903      * @throws IOException if there is a low-level IO error
 1904      */
 1905     public void addDocument(Document doc) throws CorruptIndexException, IOException {
 1906       addDocument(doc, analyzer);
 1907     }
 1908   
 1909     /**
 1910      * Adds a document to this index, using the provided analyzer instead of the
 1911      * value of {@link #getAnalyzer()}.  If the document contains more than
 1912      * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
 1913      * discarded.
 1914      *
 1915      * <p>See {@link #addDocument(Document)} for details on
 1916      * index and IndexWriter state after an Exception, and
 1917      * flushing/merging temporary free space requirements.</p>
 1918      *
 1919      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 1920      * you should immediately close the writer.  See <a
 1921      * href="#OOME">above</a> for details.</p>
 1922      *
 1923      * @throws CorruptIndexException if the index is corrupt
 1924      * @throws IOException if there is a low-level IO error
 1925      */
 1926     public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
 1927       ensureOpen();
 1928       boolean doFlush = false;
 1929       boolean success = false;
 1930       try {
 1931         try {
 1932           doFlush = docWriter.addDocument(doc, analyzer);
 1933           success = true;
 1934         } finally {
 1935           if (!success) {
 1936   
 1937             if (infoStream != null)
 1938               message("hit exception adding document");
 1939   
 1940             synchronized (this) {
 1941               // If docWriter has some aborted files that were
 1942               // never incref'd, then we clean them up here
 1943               if (docWriter != null) {
 1944                 final Collection<String> files = docWriter.abortedFiles();
 1945                 if (files != null)
 1946                   deleter.deleteNewFiles(files);
 1947               }
 1948             }
 1949           }
 1950         }
 1951         if (doFlush)
 1952           flush(true, false, false);
 1953       } catch (OutOfMemoryError oom) {
 1954         handleOOM(oom, "addDocument");
 1955       }
 1956     }
 1957   
 1958     /**
 1959      * Deletes the document(s) containing <code>term</code>.
 1960      *
 1961      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 1962      * you should immediately close the writer.  See <a
 1963      * href="#OOME">above</a> for details.</p>
 1964      *
 1965      * @param term the term to identify the documents to be deleted
 1966      * @throws CorruptIndexException if the index is corrupt
 1967      * @throws IOException if there is a low-level IO error
 1968      */
 1969     public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
 1970       ensureOpen();
 1971       try {
 1972         boolean doFlush = docWriter.bufferDeleteTerm(term);
 1973         if (doFlush)
 1974           flush(true, false, false);
 1975       } catch (OutOfMemoryError oom) {
 1976         handleOOM(oom, "deleteDocuments(Term)");
 1977       }
 1978     }
 1979   
 1980     /**
 1981      * Deletes the document(s) containing any of the
 1982      * terms. All deletes are flushed at the same time.
 1983      *
 1984      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 1985      * you should immediately close the writer.  See <a
 1986      * href="#OOME">above</a> for details.</p>
 1987      *
 1988      * @param terms array of terms to identify the documents
 1989      * to be deleted
 1990      * @throws CorruptIndexException if the index is corrupt
 1991      * @throws IOException if there is a low-level IO error
 1992      */
 1993     public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException {
 1994       ensureOpen();
 1995       try {
 1996         boolean doFlush = docWriter.bufferDeleteTerms(terms);
 1997         if (doFlush)
 1998           flush(true, false, false);
 1999       } catch (OutOfMemoryError oom) {
 2000         handleOOM(oom, "deleteDocuments(Term..)");
 2001       }
 2002     }
 2003   
 2004     /**
 2005      * Deletes the document(s) matching the provided query.
 2006      *
 2007      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2008      * you should immediately close the writer.  See <a
 2009      * href="#OOME">above</a> for details.</p>
 2010      *
 2011      * @param query the query to identify the documents to be deleted
 2012      * @throws CorruptIndexException if the index is corrupt
 2013      * @throws IOException if there is a low-level IO error
 2014      */
 2015     public void deleteDocuments(Query query) throws CorruptIndexException, IOException {
 2016       ensureOpen();
 2017       boolean doFlush = docWriter.bufferDeleteQuery(query);
 2018       if (doFlush)
 2019         flush(true, false, false);
 2020     }
 2021   
 2022     /**
 2023      * Deletes the document(s) matching any of the provided queries.
 2024      * All deletes are flushed at the same time.
 2025      *
 2026      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2027      * you should immediately close the writer.  See <a
 2028      * href="#OOME">above</a> for details.</p>
 2029      *
 2030      * @param queries array of queries to identify the documents
 2031      * to be deleted
 2032      * @throws CorruptIndexException if the index is corrupt
 2033      * @throws IOException if there is a low-level IO error
 2034      */
 2035     public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException {
 2036       ensureOpen();
 2037       boolean doFlush = docWriter.bufferDeleteQueries(queries);
 2038       if (doFlush)
 2039         flush(true, false, false);
 2040     }
 2041   
 2042     /**
 2043      * Updates a document by first deleting the document(s)
 2044      * containing <code>term</code> and then adding the new
 2045      * document.  The delete and then add are atomic as seen
 2046      * by a reader on the same index (flush may happen only after
 2047      * the add).
 2048      *
 2049      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2050      * you should immediately close the writer.  See <a
 2051      * href="#OOME">above</a> for details.</p>
 2052      *
 2053      * @param term the term to identify the document(s) to be
 2054      * deleted
 2055      * @param doc the document to be added
 2056      * @throws CorruptIndexException if the index is corrupt
 2057      * @throws IOException if there is a low-level IO error
 2058      */
 2059     public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {
 2060       ensureOpen();
 2061       updateDocument(term, doc, getAnalyzer());
 2062     }
 2063   
 2064     /**
 2065      * Updates a document by first deleting the document(s)
 2066      * containing <code>term</code> and then adding the new
 2067      * document.  The delete and then add are atomic as seen
 2068      * by a reader on the same index (flush may happen only after
 2069      * the add).
 2070      *
 2071      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2072      * you should immediately close the writer.  See <a
 2073      * href="#OOME">above</a> for details.</p>
 2074      *
 2075      * @param term the term to identify the document(s) to be
 2076      * deleted
 2077      * @param doc the document to be added
 2078      * @param analyzer the analyzer to use when analyzing the document
 2079      * @throws CorruptIndexException if the index is corrupt
 2080      * @throws IOException if there is a low-level IO error
 2081      */
 2082     public void updateDocument(Term term, Document doc, Analyzer analyzer)
 2083         throws CorruptIndexException, IOException {
 2084       ensureOpen();
 2085       try {
 2086         boolean doFlush = false;
 2087         boolean success = false;
 2088         try {
 2089           doFlush = docWriter.updateDocument(term, doc, analyzer);
 2090           success = true;
 2091         } finally {
 2092           if (!success) {
 2093   
 2094             if (infoStream != null)
 2095               message("hit exception updating document");
 2096   
 2097             synchronized (this) {
 2098               // If docWriter has some aborted files that were
 2099               // never incref'd, then we clean them up here
 2100               final Collection<String> files = docWriter.abortedFiles();
 2101               if (files != null)
 2102                 deleter.deleteNewFiles(files);
 2103             }
 2104           }
 2105         }
 2106         if (doFlush)
 2107           flush(true, false, false);
 2108       } catch (OutOfMemoryError oom) {
 2109         handleOOM(oom, "updateDocument");
 2110       }
 2111     }
 2112   
 2113     // for test purpose
 2114     final synchronized int getSegmentCount(){
 2115       return segmentInfos.size();
 2116     }
 2117   
 2118     // for test purpose
 2119     final synchronized int getNumBufferedDocuments(){
 2120       return docWriter.getNumDocsInRAM();
 2121     }
 2122   
 2123     // for test purpose
 2124     final synchronized int getDocCount(int i) {
 2125       if (i >= 0 && i < segmentInfos.size()) {
 2126         return segmentInfos.info(i).docCount;
 2127       } else {
 2128         return -1;
 2129       }
 2130     }
 2131   
 2132     // for test purpose
 2133     final synchronized int getFlushCount() {
 2134       return flushCount;
 2135     }
 2136   
 2137     // for test purpose
 2138     final synchronized int getFlushDeletesCount() {
 2139       return flushDeletesCount;
 2140     }
 2141   
 2142     final String newSegmentName() {
 2143       // Cannot synchronize on IndexWriter because that causes
 2144       // deadlock
 2145       synchronized(segmentInfos) {
 2146         // Important to increment changeCount so that the
 2147         // segmentInfos is written on close.  Otherwise we
 2148         // could close, re-open and re-return the same segment
 2149         // name that was previously returned which can cause
 2150         // problems at least with ConcurrentMergeScheduler.
 2151         changeCount++;
 2152         return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
 2153       }
 2154     }
 2155   
 2156     /** If non-null, information about merges will be printed to this.
 2157      */
 2158     private PrintStream infoStream = null;
 2159     private static PrintStream defaultInfoStream = null;
 2160   
 2161     /**
 2162      * Requests an "optimize" operation on an index, priming the index
 2163      * for the fastest available search. Traditionally this has meant
 2164      * merging all segments into a single segment as is done in the
 2165      * default merge policy, but individual merge policies may implement
 2166      * optimize in different ways.
 2167      *
 2168      * <p>It is recommended that this method be called upon completion of indexing.  In
 2169      * environments with frequent updates, optimize is best done during low volume times, if at all. 
 2170      * 
 2171      * </p>
 2172      * <p>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. </p>
 2173      *
 2174      * <p>Note that optimize requires 2X the index size free
 2175      * space in your Directory.  For example, if your index
 2176      * size is 10 MB then you need 20 MB free for optimize to
 2177      * complete.</p>
 2178      *
 2179      * <p>If some but not all readers re-open while an
 2180      * optimize is underway, this will cause > 2X temporary
 2181      * space to be consumed as those new readers will then
 2182      * hold open the partially optimized segments at that
 2183      * time.  It is best not to re-open readers while optimize
 2184      * is running.</p>
 2185      *
 2186      * <p>The actual temporary usage could be much less than
 2187      * these figures (it depends on many factors).</p>
 2188      *
 2189      * <p>In general, once the optimize completes, the total size of the
 2190      * index will be less than the size of the starting index.
 2191      * It could be quite a bit smaller (if there were many
 2192      * pending deletes) or just slightly smaller.</p>
 2193      *
 2194      * <p>If an Exception is hit during optimize(), for example
 2195      * due to disk full, the index will not be corrupt and no
 2196      * documents will have been lost.  However, it may have
 2197      * been partially optimized (some segments were merged but
 2198      * not all), and it's possible that one of the segments in
 2199      * the index will be in non-compound format even when
 2200      * using compound file format.  This will occur when the
 2201      * Exception is hit during conversion of the segment into
 2202      * compound format.</p>
 2203      *
 2204      * <p>This call will optimize those segments present in
 2205      * the index when the call started.  If other threads are
 2206      * still adding documents and flushing segments, those
 2207      * newly created segments will not be optimized unless you
 2208      * call optimize again.</p>
 2209      *
 2210      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2211      * you should immediately close the writer.  See <a
 2212      * href="#OOME">above</a> for details.</p>
 2213      *
 2214      * @throws CorruptIndexException if the index is corrupt
 2215      * @throws IOException if there is a low-level IO error
 2216      * @see LogMergePolicy#findMergesForOptimize
 2217     */
 2218     public void optimize() throws CorruptIndexException, IOException {
 2219       optimize(true);
 2220     }
 2221   
 2222     /**
 2223      * Optimize the index down to <= maxNumSegments.  If
 2224      * maxNumSegments==1 then this is the same as {@link
 2225      * #optimize()}.
 2226      *
 2227      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2228      * you should immediately close the writer.  See <a
 2229      * href="#OOME">above</a> for details.</p>
 2230      *
 2231      * @param maxNumSegments maximum number of segments left
 2232      * in the index after optimization finishes
 2233      */
 2234     public void optimize(int maxNumSegments) throws CorruptIndexException, IOException {
 2235       optimize(maxNumSegments, true);
 2236     }
 2237   
 2238     /** Just like {@link #optimize()}, except you can specify
 2239      *  whether the call should block until the optimize
 2240      *  completes.  This is only meaningful with a
 2241      *  {@link MergeScheduler} that is able to run merges in
 2242      *  background threads.
 2243      *
 2244      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2245      * you should immediately close the writer.  See <a
 2246      * href="#OOME">above</a> for details.</p>
 2247      */
 2248     public void optimize(boolean doWait) throws CorruptIndexException, IOException {
 2249       optimize(1, doWait);
 2250     }
 2251   
 2252     /** Just like {@link #optimize(int)}, except you can
 2253      *  specify whether the call should block until the
 2254      *  optimize completes.  This is only meaningful with a
 2255      *  {@link MergeScheduler} that is able to run merges in
 2256      *  background threads.
 2257      *
 2258      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2259      * you should immediately close the writer.  See <a
 2260      * href="#OOME">above</a> for details.</p>
 2261      */
 2262     public void optimize(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException {
 2263       ensureOpen();
 2264   
 2265       if (maxNumSegments < 1)
 2266         throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
 2267   
 2268       if (infoStream != null)
 2269         message("optimize: index now " + segString());
 2270   
 2271       flush(true, false, true);
 2272   
 2273       synchronized(this) {
 2274         resetMergeExceptions();
 2275         segmentsToOptimize = new HashSet<SegmentInfo>();
 2276         final int numSegments = segmentInfos.size();
 2277         for(int i=0;i<numSegments;i++)
 2278           segmentsToOptimize.add(segmentInfos.info(i));
 2279         
 2280         // Now mark all pending & running merges as optimize
 2281         // merge:
 2282         for(final MergePolicy.OneMerge merge  : pendingMerges) {
 2283           merge.optimize = true;
 2284           merge.maxNumSegmentsOptimize = maxNumSegments;
 2285         }
 2286   
 2287         for ( final MergePolicy.OneMerge merge: runningMerges ) {
 2288           merge.optimize = true;
 2289           merge.maxNumSegmentsOptimize = maxNumSegments;
 2290         }
 2291       }
 2292   
 2293       maybeMerge(maxNumSegments, true);
 2294   
 2295       if (doWait) {
 2296         synchronized(this) {
 2297           while(true) {
 2298   
 2299             if (hitOOM) {
 2300               throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete optimize");
 2301             }
 2302   
 2303             if (mergeExceptions.size() > 0) {
 2304               // Forward any exceptions in background merge
 2305               // threads to the current thread:
 2306               final int size = mergeExceptions.size();
 2307               for(int i=0;i<size;i++) {
 2308                 final MergePolicy.OneMerge merge = mergeExceptions.get(i);
 2309                 if (merge.optimize) {
 2310                   IOException err = new IOException("background merge hit exception: " + merge.segString(directory));
 2311                   final Throwable t = merge.getException();
 2312                   if (t != null)
 2313                     err.initCause(t);
 2314                   throw err;
 2315                 }
 2316               }
 2317             }
 2318   
 2319             if (optimizeMergesPending())
 2320               doWait();
 2321             else
 2322               break;
 2323           }
 2324         }
 2325   
 2326         // If close is called while we are still
 2327         // running, throw an exception so the calling
 2328         // thread will know the optimize did not
 2329         // complete
 2330         ensureOpen();
 2331       }
 2332   
 2333       // NOTE: in the ConcurrentMergeScheduler case, when
 2334       // doWait is false, we can return immediately while
 2335       // background threads accomplish the optimization
 2336     }
 2337   
 2338     /** Returns true if any merges in pendingMerges or
 2339      *  runningMerges are optimization merges. */
 2340     private synchronized boolean optimizeMergesPending() {
 2341       for (final MergePolicy.OneMerge merge : pendingMerges) {
 2342         if (merge.optimize)
 2343           return true;
 2344       }
 2345       
 2346       for (final MergePolicy.OneMerge merge : runningMerges) {
 2347         if (merge.optimize)
 2348           return true;
 2349       }
 2350       
 2351       return false;
 2352     }
 2353   
 2354     /** Just like {@link #expungeDeletes()}, except you can
 2355      *  specify whether the call should block until the
 2356      *  operation completes.  This is only meaningful with a
 2357      *  {@link MergeScheduler} that is able to run merges in
 2358      *  background threads.
 2359      *
 2360      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2361      * you should immediately close the writer.  See <a
 2362      * href="#OOME">above</a> for details.</p>
 2363      */
 2364     public void expungeDeletes(boolean doWait)
 2365       throws CorruptIndexException, IOException {
 2366       ensureOpen();
 2367   
 2368       if (infoStream != null)
 2369         message("expungeDeletes: index now " + segString());
 2370   
 2371       MergePolicy.MergeSpecification spec;
 2372   
 2373       synchronized(this) {
 2374         spec = mergePolicy.findMergesToExpungeDeletes(segmentInfos);
 2375         if (spec != null) {
 2376           final int numMerges = spec.merges.size();
 2377           for(int i=0;i<numMerges;i++)
 2378             registerMerge(spec.merges.get(i));
 2379         }
 2380       }
 2381   
 2382       mergeScheduler.merge(this);
 2383   
 2384       if (spec != null && doWait) {
 2385         final int numMerges = spec.merges.size();
 2386         synchronized(this) {
 2387           boolean running = true;
 2388           while(running) {
 2389   
 2390             if (hitOOM) {
 2391               throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete expungeDeletes");
 2392             }
 2393   
 2394             // Check each merge that MergePolicy asked us to
 2395             // do, to see if any of them are still running and
 2396             // if any of them have hit an exception.
 2397             running = false;
 2398             for(int i=0;i<numMerges;i++) {
 2399               final MergePolicy.OneMerge merge = spec.merges.get(i);
 2400               if (pendingMerges.contains(merge) || runningMerges.contains(merge))
 2401                 running = true;
 2402               Throwable t = merge.getException();
 2403               if (t != null) {
 2404                 IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));
 2405                 ioe.initCause(t);
 2406                 throw ioe;
 2407               }
 2408             }
 2409   
 2410             // If any of our merges are still running, wait:
 2411             if (running)
 2412               doWait();
 2413           }
 2414         }
 2415       }
 2416   
 2417       // NOTE: in the ConcurrentMergeScheduler case, when
 2418       // doWait is false, we can return immediately while
 2419       // background threads accomplish the optimization
 2420     }
 2421   
 2422   
 2423     /** Expunges all deletes from the index.  When an index
 2424      *  has many document deletions (or updates to existing
 2425      *  documents), it's best to either call optimize or
 2426      *  expungeDeletes to remove all unused data in the index
 2427      *  associated with the deleted documents.  To see how
 2428      *  many deletions you have pending in your index, call
 2429      *  {@link IndexReader#numDeletedDocs}
 2430      *  This saves disk space and memory usage while
 2431      *  searching.  expungeDeletes should be somewhat faster
 2432      *  than optimize since it does not insist on reducing the
 2433      *  index to a single segment (though, this depends on the
 2434      *  {@link MergePolicy}; see {@link
 2435      *  MergePolicy#findMergesToExpungeDeletes}.). Note that
 2436      *  this call does not first commit any buffered
 2437      *  documents, so you must do so yourself if necessary.
 2438      *  See also {@link #expungeDeletes(boolean)}
 2439      *
 2440      *  <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2441      *  you should immediately close the writer.  See <a
 2442      *  href="#OOME">above</a> for details.</p>
 2443      */
 2444     public void expungeDeletes() throws CorruptIndexException, IOException {
 2445       expungeDeletes(true);
 2446     }
 2447   
 2448     /**
 2449      * Expert: asks the mergePolicy whether any merges are
 2450      * necessary now and if so, runs the requested merges and
 2451      * then iterate (test again if merges are needed) until no
 2452      * more merges are returned by the mergePolicy.
 2453      *
 2454      * Explicit calls to maybeMerge() are usually not
 2455      * necessary. The most common case is when merge policy
 2456      * parameters have changed.
 2457      *
 2458      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2459      * you should immediately close the writer.  See <a
 2460      * href="#OOME">above</a> for details.</p>
 2461      */
 2462     public final void maybeMerge() throws CorruptIndexException, IOException {
 2463       maybeMerge(false);
 2464     }
 2465   
 2466     private final void maybeMerge(boolean optimize) throws CorruptIndexException, IOException {
 2467       maybeMerge(1, optimize);
 2468     }
 2469   
 2470     private final void maybeMerge(int maxNumSegmentsOptimize, boolean optimize) throws CorruptIndexException, IOException {
 2471       updatePendingMerges(maxNumSegmentsOptimize, optimize);
 2472       mergeScheduler.merge(this);
 2473     }
 2474   
 2475     private synchronized void updatePendingMerges(int maxNumSegmentsOptimize, boolean optimize)
 2476       throws CorruptIndexException, IOException {
 2477       assert !optimize || maxNumSegmentsOptimize > 0;
 2478   
 2479       if (stopMerges)
 2480         return;
 2481   
 2482       // Do not start new merges if we've hit OOME
 2483       if (hitOOM) {
 2484         return;
 2485       }
 2486   
 2487       final MergePolicy.MergeSpecification spec;
 2488       if (optimize) {
 2489         spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize);
 2490   
 2491         if (spec != null) {
 2492           final int numMerges = spec.merges.size();
 2493           for(int i=0;i<numMerges;i++) {
 2494             final MergePolicy.OneMerge merge = ( spec.merges.get(i));
 2495             merge.optimize = true;
 2496             merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize;
 2497           }
 2498         }
 2499   
 2500       } else
 2501         spec = mergePolicy.findMerges(segmentInfos);
 2502   
 2503       if (spec != null) {
 2504         final int numMerges = spec.merges.size();
 2505         for(int i=0;i<numMerges;i++)
 2506           registerMerge(spec.merges.get(i));
 2507       }
 2508     }
 2509   
 2510     /** Expert: the {@link MergeScheduler} calls this method
 2511      *  to retrieve the next merge requested by the
 2512      *  MergePolicy */
 2513     synchronized MergePolicy.OneMerge getNextMerge() {
 2514       if (pendingMerges.size() == 0)
 2515         return null;
 2516       else {
 2517         // Advance the merge from pending to running
 2518         MergePolicy.OneMerge merge = pendingMerges.removeFirst();
 2519         runningMerges.add(merge);
 2520         return merge;
 2521       }
 2522     }
 2523   
 2524     /** Like getNextMerge() except only returns a merge if it's
 2525      *  external. */
 2526     private synchronized MergePolicy.OneMerge getNextExternalMerge() {
 2527       if (pendingMerges.size() == 0)
 2528         return null;
 2529       else {
 2530         Iterator<MergePolicy.OneMerge> it = pendingMerges.iterator();
 2531         while(it.hasNext()) {
 2532           MergePolicy.OneMerge merge = it.next();
 2533           if (merge.isExternal) {
 2534             // Advance the merge from pending to running
 2535             it.remove();
 2536             runningMerges.add(merge);
 2537             return merge;
 2538           }
 2539         }
 2540   
 2541         // All existing merges do not involve external segments
 2542         return null;
 2543       }
 2544     }
 2545   
 2546     /*
 2547      * Begin a transaction.  During a transaction, any segment
 2548      * merges that happen (or ram segments flushed) will not
 2549      * write a new segments file and will not remove any files
 2550      * that were present at the start of the transaction.  You
 2551      * must make a matched (try/finally) call to
 2552      * commitTransaction() or rollbackTransaction() to finish
 2553      * the transaction.
 2554      *
 2555      * Note that buffered documents and delete terms are not handled
 2556      * within the transactions, so they must be flushed before the
 2557      * transaction is started.
 2558      */
 2559     private synchronized void startTransaction(boolean haveReadLock) throws IOException {
 2560   
 2561       boolean success = false;
 2562       try {
 2563         if (infoStream != null)
 2564           message("now start transaction");
 2565   
 2566         assert docWriter.getNumBufferedDeleteTerms() == 0 :
 2567         "calling startTransaction with buffered delete terms not supported: numBufferedDeleteTerms=" + docWriter.getNumBufferedDeleteTerms();
 2568         assert docWriter.getNumDocsInRAM() == 0 :
 2569         "calling startTransaction with buffered documents not supported: numDocsInRAM=" + docWriter.getNumDocsInRAM();
 2570   
 2571         ensureOpen();
 2572   
 2573         // If a transaction is trying to roll back (because
 2574         // addIndexes hit an exception) then wait here until
 2575         // that's done:
 2576         synchronized(this) {
 2577           while(stopMerges)
 2578             doWait();
 2579         }
 2580         success = true;
 2581       } finally {
 2582         // Release the write lock if our caller held it, on
 2583         // hitting an exception
 2584         if (!success && haveReadLock)
 2585           releaseRead();
 2586       }
 2587   
 2588       if (haveReadLock) {
 2589         upgradeReadToWrite();
 2590       } else {
 2591         acquireWrite();
 2592       }
 2593   
 2594       success = false;
 2595       try {
 2596         localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
 2597   
 2598         assert !hasExternalSegments();
 2599   
 2600         localFlushedDocCount = docWriter.getFlushedDocCount();
 2601   
 2602         // We must "protect" our files at this point from
 2603         // deletion in case we need to rollback:
 2604         deleter.incRef(segmentInfos, false);
 2605   
 2606         success = true;
 2607       } finally {
 2608         if (!success)
 2609           finishAddIndexes();
 2610       }
 2611     }
 2612   
 2613     /*
 2614      * Rolls back the transaction and restores state to where
 2615      * we were at the start.
 2616      */
 2617     private synchronized void rollbackTransaction() throws IOException {
 2618   
 2619       if (infoStream != null)
 2620         message("now rollback transaction");
 2621   
 2622       if (docWriter != null) {
 2623         docWriter.setFlushedDocCount(localFlushedDocCount);
 2624       }
 2625   
 2626       // Must finish merges before rolling back segmentInfos
 2627       // so merges don't hit exceptions on trying to commit
 2628       // themselves, don't get files deleted out from under
 2629       // them, etc:
 2630       finishMerges(false);
 2631   
 2632       // Keep the same segmentInfos instance but replace all
 2633       // of its SegmentInfo instances.  This is so the next
 2634       // attempt to commit using this instance of IndexWriter
 2635       // will always write to a new generation ("write once").
 2636       segmentInfos.clear();
 2637       segmentInfos.addAll(localRollbackSegmentInfos);
 2638       localRollbackSegmentInfos = null;
 2639   
 2640       // This must come after we rollback segmentInfos, so
 2641       // that if a commit() kicks off it does not see the
 2642       // segmentInfos with external segments
 2643       finishAddIndexes();
 2644   
 2645       // Ask deleter to locate unreferenced files we had
 2646       // created & remove them:
 2647       deleter.checkpoint(segmentInfos, false);
 2648   
 2649       // Remove the incRef we did in startTransaction:
 2650       deleter.decRef(segmentInfos);
 2651   
 2652       // Also ask deleter to remove any newly created files
 2653       // that were never incref'd; this "garbage" is created
 2654       // when a merge kicks off but aborts part way through
 2655       // before it had a chance to incRef the files it had
 2656       // partially created
 2657       deleter.refresh();
 2658       
 2659       notifyAll();
 2660   
 2661       assert !hasExternalSegments();
 2662     }
 2663   
 2664     /*
 2665      * Commits the transaction.  This will write the new
 2666      * segments file and remove and pending deletions we have
 2667      * accumulated during the transaction
 2668      */
 2669     private synchronized void commitTransaction() throws IOException {
 2670   
 2671       if (infoStream != null)
 2672         message("now commit transaction");
 2673   
 2674       // Give deleter a chance to remove files now:
 2675       checkpoint();
 2676   
 2677       // Remove the incRef we did in startTransaction.
 2678       deleter.decRef(localRollbackSegmentInfos);
 2679   
 2680       localRollbackSegmentInfos = null;
 2681   
 2682       assert !hasExternalSegments();
 2683   
 2684       finishAddIndexes();
 2685     }
 2686   
 2687     /**
 2688      * Close the <code>IndexWriter</code> without committing
 2689      * any changes that have occurred since the last commit
 2690      * (or since it was opened, if commit hasn't been called).
 2691      * This removes any temporary files that had been created,
 2692      * after which the state of the index will be the same as
 2693      * it was when commit() was last called or when this
 2694      * writer was first opened.  This also clears a previous
 2695      * call to {@link #prepareCommit}.
 2696      * @throws IOException if there is a low-level IO error
 2697      */
 2698     public void rollback() throws IOException {
 2699       ensureOpen();
 2700   
 2701       // Ensure that only one thread actually gets to do the closing:
 2702       if (shouldClose())
 2703         rollbackInternal();
 2704     }
 2705   
 2706     private void rollbackInternal() throws IOException {
 2707   
 2708       boolean success = false;
 2709   
 2710       docWriter.pauseAllThreads();
 2711   
 2712       try {
 2713         finishMerges(false);
 2714   
 2715         // Must pre-close these two, in case they increment
 2716         // changeCount so that we can then set it to false
 2717         // before calling closeInternal
 2718         mergePolicy.close();
 2719         mergeScheduler.close();
 2720   
 2721         synchronized(this) {
 2722   
 2723           if (pendingCommit != null) {
 2724             pendingCommit.rollbackCommit(directory);
 2725             deleter.decRef(pendingCommit);
 2726             pendingCommit = null;
 2727             notifyAll();
 2728           }
 2729   
 2730           // Keep the same segmentInfos instance but replace all
 2731           // of its SegmentInfo instances.  This is so the next
 2732           // attempt to commit using this instance of IndexWriter
 2733           // will always write to a new generation ("write
 2734           // once").
 2735           segmentInfos.clear();
 2736           segmentInfos.addAll(rollbackSegmentInfos);
 2737   
 2738           assert !hasExternalSegments();
 2739           
 2740           docWriter.abort();
 2741   
 2742           assert testPoint("rollback before checkpoint");
 2743   
 2744           // Ask deleter to locate unreferenced files & remove
 2745           // them:
 2746           deleter.checkpoint(segmentInfos, false);
 2747           deleter.refresh();
 2748         }
 2749   
 2750         // Don't bother saving any changes in our segmentInfos
 2751         readerPool.clear(null);
 2752   
 2753         lastCommitChangeCount = changeCount;
 2754   
 2755         success = true;
 2756       } catch (OutOfMemoryError oom) {
 2757         handleOOM(oom, "rollbackInternal");
 2758       } finally {
 2759         synchronized(this) {
 2760           if (!success) {
 2761             docWriter.resumeAllThreads();
 2762             closing = false;
 2763             notifyAll();
 2764             if (infoStream != null)
 2765               message("hit exception during rollback");
 2766           }
 2767         }
 2768       }
 2769   
 2770       closeInternal(false);
 2771     }
 2772   
 2773     /**
 2774      * Delete all documents in the index.
 2775      *
 2776      * <p>This method will drop all buffered documents and will 
 2777      *    remove all segments from the index. This change will not be
 2778      *    visible until a {@link #commit()} has been called. This method
 2779      *    can be rolled back using {@link #rollback()}.</p>
 2780      *
 2781      * <p>NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).</p>
 2782      *
 2783      * <p>NOTE: this method will forcefully abort all merges
 2784      *    in progress.  If other threads are running {@link
 2785      *    #optimize()} or any of the addIndexes methods, they
 2786      *    will receive {@link MergePolicy.MergeAbortedException}s.
 2787      */
 2788     public synchronized void deleteAll() throws IOException {
 2789       docWriter.pauseAllThreads();
 2790       try {
 2791   
 2792         // Abort any running merges
 2793         finishMerges(false);
 2794   
 2795         // Remove any buffered docs
 2796         docWriter.abort();
 2797         docWriter.setFlushedDocCount(0);
 2798   
 2799         // Remove all segments
 2800         segmentInfos.clear();
 2801   
 2802         // Ask deleter to locate unreferenced files & remove them:
 2803         deleter.checkpoint(segmentInfos, false);
 2804         deleter.refresh();
 2805   
 2806         // Don't bother saving any changes in our segmentInfos
 2807         readerPool.clear(null);      
 2808   
 2809         // Mark that the index has changed
 2810         ++changeCount;
 2811       } catch (OutOfMemoryError oom) {
 2812         handleOOM(oom, "deleteAll");
 2813       } finally {
 2814         docWriter.resumeAllThreads();
 2815         if (infoStream != null) {
 2816           message("hit exception during deleteAll");
 2817         }
 2818       }
 2819     }
 2820   
 2821     private synchronized void finishMerges(boolean waitForMerges) throws IOException {
 2822       if (!waitForMerges) {
 2823   
 2824         stopMerges = true;
 2825   
 2826         // Abort all pending & running merges:
 2827         for (final MergePolicy.OneMerge merge : pendingMerges) {
 2828           if (infoStream != null)
 2829             message("now abort pending merge " + merge.segString(directory));
 2830           merge.abort();
 2831           mergeFinish(merge);
 2832         }
 2833         pendingMerges.clear();
 2834         
 2835         for (final MergePolicy.OneMerge merge : runningMerges) {
 2836           if (infoStream != null)
 2837             message("now abort running merge " + merge.segString(directory));
 2838           merge.abort();
 2839         }
 2840   
 2841         // Ensure any running addIndexes finishes.  It's fine
 2842         // if a new one attempts to start because its merges
 2843         // will quickly see the stopMerges == true and abort.
 2844         acquireRead();
 2845         releaseRead();
 2846   
 2847         // These merges periodically check whether they have
 2848         // been aborted, and stop if so.  We wait here to make
 2849         // sure they all stop.  It should not take very long
 2850         // because the merge threads periodically check if
 2851         // they are aborted.
 2852         while(runningMerges.size() > 0) {
 2853           if (infoStream != null)
 2854             message("now wait for " + runningMerges.size() + " running merge to abort");
 2855           doWait();
 2856         }
 2857   
 2858         stopMerges = false;
 2859         notifyAll();
 2860   
 2861         assert 0 == mergingSegments.size();
 2862   
 2863         if (infoStream != null)
 2864           message("all running merges have aborted");
 2865   
 2866       } else {
 2867         // waitForMerges() will ensure any running addIndexes finishes.  
 2868         // It's fine if a new one attempts to start because from our
 2869         // caller above the call will see that we are in the
 2870         // process of closing, and will throw an
 2871         // AlreadyClosedException.
 2872         waitForMerges();
 2873       }
 2874     }
 2875   
 2876     /**
 2877      * Wait for any currently outstanding merges to finish.
 2878      *
 2879      * <p>It is guaranteed that any merges started prior to calling this method 
 2880      *    will have completed once this method completes.</p>
 2881      */
 2882     public synchronized void waitForMerges() {
 2883       // Ensure any running addIndexes finishes.
 2884       acquireRead();
 2885       releaseRead();
 2886   
 2887       while(pendingMerges.size() > 0 || runningMerges.size() > 0) {
 2888         doWait();
 2889       }
 2890   
 2891       // sanity check
 2892       assert 0 == mergingSegments.size();
 2893     }
 2894   
 2895     /*
 2896      * Called whenever the SegmentInfos has been updated and
 2897      * the index files referenced exist (correctly) in the
 2898      * index directory.
 2899      */
 2900     private synchronized void checkpoint() throws IOException {
 2901       changeCount++;
 2902       deleter.checkpoint(segmentInfos, false);
 2903     }
 2904   
 2905     private void finishAddIndexes() {
 2906       releaseWrite();
 2907     }
 2908   
 2909     private void blockAddIndexes(boolean includePendingClose) {
 2910   
 2911       acquireRead();
 2912   
 2913       boolean success = false;
 2914       try {
 2915   
 2916         // Make sure we are still open since we could have
 2917         // waited quite a while for last addIndexes to finish
 2918         ensureOpen(includePendingClose);
 2919         success = true;
 2920       } finally {
 2921         if (!success)
 2922           releaseRead();
 2923       }
 2924     }
 2925   
 2926     private void resumeAddIndexes() {
 2927       releaseRead();
 2928     }
 2929   
 2930     private synchronized void resetMergeExceptions() {
 2931       mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
 2932       mergeGen++;
 2933     }
 2934   
 2935     private void noDupDirs(Directory... dirs) {
 2936       HashSet<Directory> dups = new HashSet<Directory>();
 2937       for(int i=0;i<dirs.length;i++) {
 2938         if (dups.contains(dirs[i]))
 2939           throw new IllegalArgumentException("Directory " + dirs[i] + " appears more than once");
 2940         if (dirs[i] == directory)
 2941           throw new IllegalArgumentException("Cannot add directory to itself");
 2942         dups.add(dirs[i]);
 2943       }
 2944     }
 2945   
 2946     /**
 2947      * Merges all segments from an array of indexes into this
 2948      * index.
 2949      *
 2950      * <p>This may be used to parallelize batch indexing.  A large document
 2951      * collection can be broken into sub-collections.  Each sub-collection can be
 2952      * indexed in parallel, on a different thread, process or machine.  The
 2953      * complete index can then be created by merging sub-collection indexes
 2954      * with this method.
 2955      *
 2956      * <p><b>NOTE:</b> the index in each Directory must not be
 2957      * changed (opened by a writer) while this method is
 2958      * running.  This method does not acquire a write lock in
 2959      * each input Directory, so it is up to the caller to
 2960      * enforce this.
 2961      *
 2962      * <p><b>NOTE:</b> while this is running, any attempts to
 2963      * add or delete documents (with another thread) will be
 2964      * paused until this method completes.
 2965      *
 2966      * <p>This method is transactional in how Exceptions are
 2967      * handled: it does not commit a new segments_N file until
 2968      * all indexes are added.  This means if an Exception
 2969      * occurs (for example disk full), then either no indexes
 2970      * will have been added or they all will have been.</p>
 2971      *
 2972      * <p>Note that this requires temporary free space in the
 2973      * Directory up to 2X the sum of all input indexes
 2974      * (including the starting index).  If readers/searchers
 2975      * are open against the starting index, then temporary
 2976      * free space required will be higher by the size of the
 2977      * starting index (see {@link #optimize()} for details).
 2978      * </p>
 2979      *
 2980      * <p>Once this completes, the final size of the index
 2981      * will be less than the sum of all input index sizes
 2982      * (including the starting index).  It could be quite a
 2983      * bit smaller (if there were many pending deletes) or
 2984      * just slightly smaller.</p>
 2985      * 
 2986      * <p>
 2987      * This requires this index not be among those to be added.
 2988      *
 2989      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 2990      * you should immediately close the writer.  See <a
 2991      * href="#OOME">above</a> for details.</p>
 2992      *
 2993      * @throws CorruptIndexException if the index is corrupt
 2994      * @throws IOException if there is a low-level IO error
 2995      */
 2996     public void addIndexesNoOptimize(Directory... dirs)
 2997         throws CorruptIndexException, IOException {
 2998   
 2999       ensureOpen();
 3000   
 3001       noDupDirs(dirs);
 3002   
 3003       // Do not allow add docs or deletes while we are running:
 3004       docWriter.pauseAllThreads();
 3005   
 3006       try {
 3007         if (infoStream != null)
 3008           message("flush at addIndexesNoOptimize");
 3009         flush(true, false, true);
 3010   
 3011         boolean success = false;
 3012   
 3013         startTransaction(false);
 3014   
 3015         try {
 3016   
 3017           int docCount = 0;
 3018           synchronized(this) {
 3019             ensureOpen();
 3020   
 3021             for (int i = 0; i < dirs.length; i++) {
 3022               if (directory == dirs[i]) {
 3023                 // cannot add this index: segments may be deleted in merge before added
 3024                 throw new IllegalArgumentException("Cannot add this index to itself");
 3025               }
 3026   
 3027               SegmentInfos sis = new SegmentInfos(); // read infos from dir
 3028               sis.read(dirs[i]);
 3029               for (int j = 0; j < sis.size(); j++) {
 3030                 SegmentInfo info = sis.info(j);
 3031                 assert !segmentInfos.contains(info): "dup info dir=" + info.dir + " name=" + info.name;
 3032                 docCount += info.docCount;
 3033                 segmentInfos.add(info); // add each info
 3034               }
 3035             }
 3036           }
 3037   
 3038           // Notify DocumentsWriter that the flushed count just increased
 3039           docWriter.updateFlushedDocCount(docCount);
 3040   
 3041           maybeMerge();
 3042   
 3043           ensureOpen();
 3044   
 3045           // If after merging there remain segments in the index
 3046           // that are in a different directory, just copy these
 3047           // over into our index.  This is necessary (before
 3048           // finishing the transaction) to avoid leaving the
 3049           // index in an unusable (inconsistent) state.
 3050           resolveExternalSegments();
 3051   
 3052           ensureOpen();
 3053   
 3054           success = true;
 3055   
 3056         } finally {
 3057           if (success) {
 3058             commitTransaction();
 3059           } else {
 3060             rollbackTransaction();
 3061           }
 3062         }
 3063       } catch (OutOfMemoryError oom) {
 3064         handleOOM(oom, "addIndexesNoOptimize");
 3065       } finally {
 3066         if (docWriter != null) {
 3067           docWriter.resumeAllThreads();
 3068         }
 3069       }
 3070     }
 3071   
 3072     private boolean hasExternalSegments() {
 3073       return segmentInfos.hasExternalSegments(directory);
 3074     }
 3075   
 3076     /* If any of our segments are using a directory != ours
 3077      * then we have to either copy them over one by one, merge
 3078      * them (if merge policy has chosen to) or wait until
 3079      * currently running merges (in the background) complete.
 3080      * We don't return until the SegmentInfos has no more
 3081      * external segments.  Currently this is only used by
 3082      * addIndexesNoOptimize(). */
 3083     private void resolveExternalSegments() throws CorruptIndexException, IOException {
 3084   
 3085       boolean any = false;
 3086   
 3087       boolean done = false;
 3088   
 3089       while(!done) {
 3090         SegmentInfo info = null;
 3091         MergePolicy.OneMerge merge = null;
 3092         synchronized(this) {
 3093   
 3094           if (stopMerges)
 3095             throw new MergePolicy.MergeAbortedException("rollback() was called or addIndexes* hit an unhandled exception");
 3096   
 3097           final int numSegments = segmentInfos.size();
 3098   
 3099           done = true;
 3100           for(int i=0;i<numSegments;i++) {
 3101             info = segmentInfos.info(i);
 3102             if (info.dir != directory) {
 3103               done = false;
 3104               final MergePolicy.OneMerge newMerge = new MergePolicy.OneMerge(segmentInfos.range(i, 1+i), mergePolicy instanceof LogMergePolicy && getUseCompoundFile());
 3105   
 3106               // Returns true if no running merge conflicts
 3107               // with this one (and, records this merge as
 3108               // pending), ie, this segment is not currently
 3109               // being merged:
 3110               if (registerMerge(newMerge)) {
 3111                 merge = newMerge;
 3112   
 3113                 // If this segment is not currently being
 3114                 // merged, then advance it to running & run
 3115                 // the merge ourself (below):
 3116                 pendingMerges.remove(merge);
 3117                 runningMerges.add(merge);
 3118                 break;
 3119               }
 3120             }
 3121           }
 3122   
 3123           if (!done && merge == null)
 3124             // We are not yet done (external segments still
 3125             // exist in segmentInfos), yet, all such segments
 3126             // are currently "covered" by a pending or running
 3127             // merge.  We now try to grab any pending merge
 3128             // that involves external segments:
 3129             merge = getNextExternalMerge();
 3130   
 3131           if (!done && merge == null)
 3132             // We are not yet done, and, all external segments
 3133             // fall under merges that the merge scheduler is
 3134             // currently running.  So, we now wait and check
 3135             // back to see if the merge has completed.
 3136             doWait();
 3137         }
 3138   
 3139         if (merge != null) {
 3140           any = true;
 3141           merge(merge);
 3142         }
 3143       }
 3144   
 3145       if (any)
 3146         // Sometimes, on copying an external segment over,
 3147         // more merges may become necessary:
 3148         mergeScheduler.merge(this);
 3149     }
 3150   
 3151     /** Merges the provided indexes into this index.
 3152      * <p>After this completes, the index is optimized. </p>
 3153      * <p>The provided IndexReaders are not closed.</p>
 3154      *
 3155      * <p><b>NOTE:</b> while this is running, any attempts to
 3156      * add or delete documents (with another thread) will be
 3157      * paused until this method completes.
 3158      *
 3159      * <p>See {@link #addIndexesNoOptimize} for
 3160      * details on transactional semantics, temporary free
 3161      * space required in the Directory, and non-CFS segments
 3162      * on an Exception.</p>
 3163      *
 3164      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 3165      * you should immediately close the writer.  See <a
 3166      * href="#OOME">above</a> for details.</p>
 3167      *
 3168      * @throws CorruptIndexException if the index is corrupt
 3169      * @throws IOException if there is a low-level IO error
 3170      */
 3171     public void addIndexes(IndexReader... readers)
 3172       throws CorruptIndexException, IOException {
 3173   
 3174       ensureOpen();
 3175   
 3176       // Do not allow add docs or deletes while we are running:
 3177       docWriter.pauseAllThreads();
 3178   
 3179       // We must pre-acquire a read lock here (and upgrade to
 3180       // write lock in startTransaction below) so that no
 3181       // other addIndexes is allowed to start up after we have
 3182       // flushed & optimized but before we then start our
 3183       // transaction.  This is because the merging below
 3184       // requires that only one segment is present in the
 3185       // index:
 3186       acquireRead();
 3187   
 3188       try {
 3189   
 3190         SegmentInfo info = null;
 3191         String mergedName = null;
 3192         SegmentMerger merger = null;
 3193   
 3194         boolean success = false;
 3195   
 3196         try {
 3197           flush(true, false, true);
 3198           optimize();					  // start with zero or 1 seg
 3199           success = true;
 3200         } finally {
 3201           // Take care to release the read lock if we hit an
 3202           // exception before starting the transaction
 3203           if (!success)
 3204             releaseRead();
 3205         }
 3206   
 3207         // true means we already have a read lock; if this
 3208         // call hits an exception it will release the write
 3209         // lock:
 3210         startTransaction(true);
 3211   
 3212         try {
 3213           mergedName = newSegmentName();
 3214           merger = new SegmentMerger(this, mergedName, null);
 3215   
 3216           SegmentReader sReader = null;
 3217           synchronized(this) {
 3218             if (segmentInfos.size() == 1) { // add existing index, if any
 3219               sReader = readerPool.get(segmentInfos.info(0), true, BufferedIndexInput.BUFFER_SIZE, -1);
 3220             }
 3221           }
 3222           
 3223           success = false;
 3224   
 3225           try {
 3226             if (sReader != null)
 3227               merger.add(sReader);
 3228   
 3229             for (int i = 0; i < readers.length; i++)      // add new indexes
 3230               merger.add(readers[i]);
 3231   
 3232             int docCount = merger.merge();                // merge 'em
 3233   
 3234             synchronized(this) {
 3235               segmentInfos.clear();                      // pop old infos & add new
 3236               info = new SegmentInfo(mergedName, docCount, directory, false, true,
 3237                                      -1, null, false, merger.hasProx());
 3238               setDiagnostics(info, "addIndexes(IndexReader...)");
 3239               segmentInfos.add(info);
 3240             }
 3241   
 3242             // Notify DocumentsWriter that the flushed count just increased
 3243             docWriter.updateFlushedDocCount(docCount);
 3244   
 3245             success = true;
 3246   
 3247           } finally {
 3248             if (sReader != null) {
 3249               readerPool.release(sReader);
 3250             }
 3251           }
 3252         } finally {
 3253           if (!success) {
 3254             if (infoStream != null)
 3255               message("hit exception in addIndexes during merge");
 3256             rollbackTransaction();
 3257           } else {
 3258             commitTransaction();
 3259           }
 3260         }
 3261       
 3262         if (mergePolicy instanceof LogMergePolicy && getUseCompoundFile()) {
 3263   
 3264           List<String> files = null;
 3265   
 3266           synchronized(this) {
 3267             // Must incRef our files so that if another thread
 3268             // is running merge/optimize, it doesn't delete our
 3269             // segment's files before we have a change to
 3270             // finish making the compound file.
 3271             if (segmentInfos.contains(info)) {
 3272               files = info.files();
 3273               deleter.incRef(files);
 3274             }
 3275           }
 3276   
 3277           if (files != null) {
 3278   
 3279             success = false;
 3280   
 3281             startTransaction(false);
 3282   
 3283             try {
 3284               merger.createCompoundFile(mergedName + ".cfs");
 3285               synchronized(this) {
 3286                 info.setUseCompoundFile(true);
 3287               }
 3288             
 3289               success = true;
 3290             
 3291             } finally {
 3292   
 3293               deleter.decRef(files);
 3294   
 3295               if (!success) {
 3296                 if (infoStream != null)
 3297                   message("hit exception building compound file in addIndexes during merge");
 3298   
 3299                 rollbackTransaction();
 3300               } else {
 3301                 commitTransaction();
 3302               }
 3303             }
 3304           }
 3305         }
 3306       } catch (OutOfMemoryError oom) {
 3307         handleOOM(oom, "addIndexes(IndexReader...)");
 3308       } finally {
 3309         if (docWriter != null) {
 3310           docWriter.resumeAllThreads();
 3311         }
 3312       }
 3313     }
 3314   
 3315     // This is called after pending added and deleted
 3316     // documents have been flushed to the Directory but before
 3317     // the change is committed (new segments_N file written).
 3318     void doAfterFlush()
 3319       throws IOException {
 3320     }
 3321   
 3322     /** Expert: prepare for commit.
 3323      *
 3324      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 3325      * you should immediately close the writer.  See <a
 3326      * href="#OOME">above</a> for details.</p>
 3327      *
 3328      * @see #prepareCommit(Map) */
 3329     public final void prepareCommit() throws CorruptIndexException, IOException {
 3330       ensureOpen();
 3331       prepareCommit(null);
 3332     }
 3333   
 3334     /** <p>Expert: prepare for commit, specifying
 3335      *  commitUserData Map (String -> String).  This does the
 3336      *  first phase of 2-phase commit. This method does all
 3337      *  steps necessary to commit changes since this writer
 3338      *  was opened: flushes pending added and deleted docs,
 3339      *  syncs the index files, writes most of next segments_N
 3340      *  file.  After calling this you must call either {@link
 3341      *  #commit()} to finish the commit, or {@link
 3342      *  #rollback()} to revert the commit and undo all changes
 3343      *  done since the writer was opened.</p>
 3344      * 
 3345      *  You can also just call {@link #commit(Map)} directly
 3346      *  without prepareCommit first in which case that method
 3347      *  will internally call prepareCommit.
 3348      *
 3349      *  <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 3350      *  you should immediately close the writer.  See <a
 3351      *  href="#OOME">above</a> for details.</p>
 3352      *
 3353      *  @param commitUserData Opaque Map (String->String)
 3354      *  that's recorded into the segments file in the index,
 3355      *  and retrievable by {@link
 3356      *  IndexReader#getCommitUserData}.  Note that when
 3357      *  IndexWriter commits itself during {@link #close}, the
 3358      *  commitUserData is unchanged (just carried over from
 3359      *  the prior commit).  If this is null then the previous
 3360      *  commitUserData is kept.  Also, the commitUserData will
 3361      *  only "stick" if there are actually changes in the
 3362      *  index to commit.
 3363      */
 3364     public final void prepareCommit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
 3365   
 3366       if (hitOOM) {
 3367         throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
 3368       }
 3369   
 3370       if (pendingCommit != null)
 3371         throw new IllegalStateException("prepareCommit was already called with no corresponding call to commit");
 3372   
 3373       if (infoStream != null)
 3374         message("prepareCommit: flush");
 3375   
 3376       flush(true, true, true);
 3377   
 3378       startCommit(0, commitUserData);
 3379     }
 3380   
 3381     // Used only by commit, below; lock order is commitLock -> IW
 3382     private final Object commitLock = new Object();
 3383   
 3384     private void commit(long sizeInBytes) throws IOException {
 3385       synchronized(commitLock) {
 3386         startCommit(sizeInBytes, null);
 3387         finishCommit();
 3388       }
 3389     }
 3390   
 3391     /**
 3392      * <p>Commits all pending changes (added & deleted
 3393      * documents, optimizations, segment merges, added
 3394      * indexes, etc.) to the index, and syncs all referenced
 3395      * index files, such that a reader will see the changes
 3396      * and the index updates will survive an OS or machine
 3397      * crash or power loss.  Note that this does not wait for
 3398      * any running background merges to finish.  This may be a
 3399      * costly operation, so you should test the cost in your
 3400      * application and do it only when really necessary.</p>
 3401      *
 3402      * <p> Note that this operation calls Directory.sync on
 3403      * the index files.  That call should not return until the
 3404      * file contents & metadata are on stable storage.  For
 3405      * FSDirectory, this calls the OS's fsync.  But, beware:
 3406      * some hardware devices may in fact cache writes even
 3407      * during fsync, and return before the bits are actually
 3408      * on stable storage, to give the appearance of faster
 3409      * performance.  If you have such a device, and it does
 3410      * not have a battery backup (for example) then on power
 3411      * loss it may still lose data.  Lucene cannot guarantee
 3412      * consistency on such devices.  </p>
 3413      *
 3414      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 3415      * you should immediately close the writer.  See <a
 3416      * href="#OOME">above</a> for details.</p>
 3417      *
 3418      * @see #prepareCommit
 3419      * @see #commit(Map)
 3420      */
 3421     public final void commit() throws CorruptIndexException, IOException {
 3422       commit(null);
 3423     }
 3424   
 3425     /** Commits all changes to the index, specifying a
 3426      *  commitUserData Map (String -> String).  This just
 3427      *  calls {@link #prepareCommit(Map)} (if you didn't
 3428      *  already call it) and then {@link #finishCommit}.
 3429      *
 3430      * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
 3431      * you should immediately close the writer.  See <a
 3432      * href="#OOME">above</a> for details.</p>
 3433      */
 3434     public final void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
 3435   
 3436       ensureOpen();
 3437   
 3438       if (infoStream != null) {
 3439         message("commit: start");
 3440       }
 3441   
 3442       synchronized(commitLock) {
 3443         if (infoStream != null) {
 3444           message("commit: enter lock");
 3445         }
 3446   
 3447         if (pendingCommit == null) {
 3448           if (infoStream != null) {
 3449             message("commit: now prepare");
 3450           }
 3451           prepareCommit(commitUserData);
 3452         } else if (infoStream != null) {
 3453           message("commit: already prepared");
 3454         }
 3455   
 3456         finishCommit();
 3457       }
 3458     }
 3459   
 3460     private synchronized final void finishCommit() throws CorruptIndexException, IOException {
 3461   
 3462       if (pendingCommit != null) {
 3463         try {
 3464           if (infoStream != null)
 3465       	  message("commit: pendingCommit != null");
 3466           pendingCommit.finishCommit(directory);
 3467           if (infoStream != null)
 3468             message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\"");
 3469           lastCommitChangeCount = pendingCommitChangeCount;
 3470           segmentInfos.updateGeneration(pendingCommit);
 3471           segmentInfos.setUserData(pendingCommit.getUserData());
 3472           setRollbackSegmentInfos(pendingCommit);
 3473           deleter.checkpoint(pendingCommit, true);
 3474         } finally {
 3475           deleter.decRef(pendingCommit);
 3476           pendingCommit = null;
 3477           notifyAll();
 3478         }
 3479   
 3480       } else if (infoStream != null)
 3481           message("commit: pendingCommit == null; skip");
 3482   
 3483       if (infoStream != null)
 3484         message("commit: done");
 3485     }
 3486   
 3487     /**
 3488      * Flush all in-memory buffered udpates (adds and deletes)
 3489      * to the Directory.
 3490      * @param triggerMerge if true, we may merge segments (if
 3491      *  deletes or docs were flushed) if necessary
 3492      * @param flushDocStores if false we are allowed to keep
 3493      *  doc stores open to share with the next segment
 3494      * @param flushDeletes whether pending deletes should also
 3495      *  be flushed
 3496      */
 3497     protected final void flush(boolean triggerMerge, boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
 3498       // We can be called during close, when closing==true, so we must pass false to ensureOpen:
 3499       ensureOpen(false);
 3500       if (doFlush(flushDocStores, flushDeletes) && triggerMerge)
 3501         maybeMerge();
 3502     }
 3503   
 3504     // TODO: this method should not have to be entirely
 3505     // synchronized, ie, merges should be allowed to commit
 3506     // even while a flush is happening
 3507     private synchronized final boolean doFlush(boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
 3508       try {
 3509         return doFlushInternal(flushDocStores, flushDeletes);
 3510       } finally {
 3511         docWriter.clearFlushPending();
 3512       }
 3513     }
 3514   
 3515     // TODO: this method should not have to be entirely
 3516     // synchronized, ie, merges should be allowed to commit
 3517     // even while a flush is happening
 3518     private synchronized final boolean doFlushInternal(boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
 3519   
 3520       if (hitOOM) {
 3521         throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
 3522       }
 3523   
 3524       ensureOpen(false);
 3525   
 3526       assert testPoint("startDoFlush");
 3527   
 3528       flushCount++;
 3529   
 3530       // If we are flushing because too many deletes
 3531       // accumulated, then we should apply the deletes to free
 3532       // RAM:
 3533       flushDeletes |= docWriter.doApplyDeletes();
 3534   
 3535       // Make sure no threads are actively adding a document.
 3536       // Returns true if docWriter is currently aborting, in
 3537       // which case we skip flushing this segment
 3538       if (docWriter.pauseAllThreads()) {
 3539         docWriter.resumeAllThreads();
 3540         return false;
 3541       }
 3542   
 3543       try {
 3544   
 3545         SegmentInfo newSegment = null;
 3546   
 3547         final int numDocs = docWriter.getNumDocsInRAM();
 3548   
 3549         // Always flush docs if there are any
 3550         boolean flushDocs = numDocs > 0;
 3551   
 3552         String docStoreSegment = docWriter.getDocStoreSegment();
 3553   
 3554         assert docStoreSegment != null || numDocs == 0: "dss=" + docStoreSegment + " numDocs=" + numDocs;
 3555   
 3556         if (docStoreSegment == null)
 3557           flushDocStores = false;
 3558   
 3559         int docStoreOffset = docWriter.getDocStoreOffset();
 3560   
 3561         boolean docStoreIsCompoundFile = false;
 3562   
 3563         if (infoStream != null) {
 3564           message("  flush: segment=" + docWriter.getSegment() +
 3565                   " docStoreSegment=" + docWriter.getDocStoreSegment() +
 3566                   " docStoreOffset=" + docStoreOffset +
 3567                   " flushDocs=" + flushDocs +
 3568                   " flushDeletes=" + flushDeletes +
 3569                   " flushDocStores=" + flushDocStores +
 3570                   " numDocs=" + numDocs +
 3571                   " numBufDelTerms=" + docWriter.getNumBufferedDeleteTerms());
 3572           message("  index before flush " + segString());
 3573         }
 3574   
 3575         // Check if the doc stores must be separately flushed
 3576         // because other segments, besides the one we are about
 3577         // to flush, reference it
 3578         if (flushDocStores && (!flushDocs || !docWriter.getSegment().equals(docWriter.getDocStoreSegment()))) {
 3579           // We must separately flush the doc store
 3580           if (infoStream != null)
 3581             message("  flush shared docStore segment " + docStoreSegment);
 3582         
 3583           docStoreIsCompoundFile = flushDocStores();
 3584           flushDocStores = false;
 3585         }
 3586   
 3587         String segment = docWriter.getSegment();
 3588   
 3589         // If we are flushing docs, segment must not be null:
 3590         assert segment != null || !flushDocs;
 3591   
 3592         if (flushDocs) {
 3593   
 3594           boolean success = false;
 3595           final int flushedDocCount;
 3596   
 3597           try {
 3598             flushedDocCount = docWriter.flush(flushDocStores);
 3599             success = true;
 3600           } finally {
 3601             if (!success) {
 3602               if (infoStream != null)
 3603                 message("hit exception flushing segment " + segment);
 3604               deleter.refresh(segment);
 3605             }
 3606           }
 3607           
 3608           if (0 == docStoreOffset && flushDocStores) {
 3609             // This means we are flushing private doc stores
 3610             // with this segment, so it will not be shared
 3611             // with other segments
 3612             assert docStoreSegment != null;
 3613             assert docStoreSegment.equals(segment);
 3614             docStoreOffset = -1;
 3615             docStoreIsCompoundFile = false;
 3616             docStoreSegment = null;
 3617           }
 3618   
 3619           // Create new SegmentInfo, but do not add to our
 3620           // segmentInfos until deletes are flushed
 3621           // successfully.
 3622           newSegment = new SegmentInfo(segment,
 3623                                        flushedDocCount,
 3624                                        directory, false, true,
 3625                                        docStoreOffset, docStoreSegment,
 3626                                        docStoreIsCompoundFile,    
 3627                                        docWriter.hasProx());
 3628           setDiagnostics(newSegment, "flush");
 3629         }
 3630   
 3631         docWriter.pushDeletes();
 3632   
 3633         if (flushDocs) {
 3634           segmentInfos.add(newSegment);
 3635           checkpoint();
 3636         }
 3637   
 3638         if (flushDocs && mergePolicy.useCompoundFile(segmentInfos, newSegment)) {
 3639           // Now build compound file
 3640           boolean success = false;
 3641           try {
 3642             docWriter.createCompoundFile(segment);
 3643             success = true;
 3644           } finally {
 3645             if (!success) {
 3646               if (infoStream != null)
 3647                 message("hit exception creating compound file for newly flushed segment " + segment);
 3648               deleter.deleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
 3649             }
 3650           }
 3651   
 3652           newSegment.setUseCompoundFile(true);
 3653           checkpoint();
 3654         }
 3655   
 3656         if (flushDeletes) {
 3657           applyDeletes();
 3658         }
 3659         
 3660         if (flushDocs)
 3661           checkpoint();
 3662   
 3663         doAfterFlush();
 3664   
 3665         return flushDocs;
 3666   
 3667       } catch (OutOfMemoryError oom) {
 3668         handleOOM(oom, "doFlush");
 3669         // never hit
 3670         return false;
 3671       } finally {
 3672         docWriter.resumeAllThreads();
 3673       }
 3674     }
 3675   
 3676     /** Expert:  Return the total size of all index files currently cached in memory.
 3677      * Useful for size management with flushRamDocs()
 3678      */
 3679     public final long ramSizeInBytes() {
 3680       ensureOpen();
 3681       return docWriter.getRAMUsed();
 3682     }
 3683   
 3684     /** Expert:  Return the number of documents currently
 3685      *  buffered in RAM. */
 3686     public final synchronized int numRamDocs() {
 3687       ensureOpen();
 3688       return docWriter.getNumDocsInRAM();
 3689     }
 3690   
 3691     private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
 3692   
 3693       int first = segmentInfos.indexOf(merge.segments.info(0));
 3694       if (first == -1)
 3695         throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current index " + segString(), directory);
 3696   
 3697       final int numSegments = segmentInfos.size();
 3698       
 3699       final int numSegmentsToMerge = merge.segments.size();
 3700       for(int i=0;i<numSegmentsToMerge;i++) {
 3701         final SegmentInfo info = merge.segments.info(i);
 3702   
 3703         if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) {
 3704           if (segmentInfos.indexOf(info) == -1)
 3705             throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
 3706           else
 3707             throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle",
 3708                                                  directory);
 3709         }
 3710       }
 3711   
 3712       return first;
 3713     }
 3714   
 3715     /** Carefully merges deletes for the segments we just
 3716      *  merged.  This is tricky because, although merging will
 3717      *  clear all deletes (compacts the documents), new
 3718      *  deletes may have been flushed to the segments since
 3719      *  the merge was started.  This method "carries over"
 3720      *  such new deletes onto the newly merged segment, and
 3721      *  saves the resulting deletes file (incrementing the
 3722      *  delete generation for merge.info).  If no deletes were
 3723      *  flushed, no new deletes file is saved. */
 3724     synchronized private void commitMergedDeletes(MergePolicy.OneMerge merge, SegmentReader mergeReader) throws IOException {
 3725   
 3726       assert testPoint("startCommitMergeDeletes");
 3727   
 3728       final SegmentInfos sourceSegments = merge.segments;
 3729   
 3730       if (infoStream != null)
 3731         message("commitMergeDeletes " + merge.segString(directory));
 3732   
 3733       // Carefully merge deletes that occurred after we
 3734       // started merging:
 3735       int docUpto = 0;
 3736       int delCount = 0;
 3737   
 3738       for(int i=0; i < sourceSegments.size(); i++) {
 3739         SegmentInfo info = sourceSegments.info(i);
 3740         int docCount = info.docCount;
 3741         SegmentReader previousReader = merge.readersClone[i];
 3742         SegmentReader currentReader = merge.readers[i];
 3743         if (previousReader.hasDeletions()) {
 3744   
 3745           // There were deletes on this segment when the merge
 3746           // started.  The merge has collapsed away those
 3747           // deletes, but, if new deletes were flushed since
 3748           // the merge started, we must now carefully keep any
 3749           // newly flushed deletes but mapping them to the new
 3750           // docIDs.
 3751   
 3752           if (currentReader.numDeletedDocs() > previousReader.numDeletedDocs()) {
 3753             // This means this segment has had new deletes
 3754             // committed since we started the merge, so we
 3755             // must merge them:
 3756             for(int j=0;j<docCount;j++) {
 3757               if (previousReader.isDeleted(j))
 3758                 assert currentReader.isDeleted(j);
 3759               else {
 3760                 if (currentReader.isDeleted(j)) {
 3761                   mergeReader.doDelete(docUpto);
 3762                   delCount++;
 3763                 }
 3764                 docUpto++;
 3765               }
 3766             }
 3767           } else {
 3768             docUpto += docCount - previousReader.numDeletedDocs();
 3769           }
 3770         } else if (currentReader.hasDeletions()) {
 3771           // This segment had no deletes before but now it
 3772           // does:
 3773           for(int j=0; j<docCount; j++) {
 3774             if (currentReader.isDeleted(j)) {
 3775               mergeReader.doDelete(docUpto);
 3776               delCount++;
 3777             }
 3778             docUpto++;
 3779           }
 3780         } else
 3781           // No deletes before or after
 3782           docUpto += info.docCount;
 3783       }
 3784   
 3785       assert mergeReader.numDeletedDocs() == delCount;
 3786   
 3787       mergeReader.hasChanges = delCount >= 0;
 3788     }
 3789   
 3790     /* FIXME if we want to support non-contiguous segment merges */
 3791     synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentMerger merger, int mergedDocCount, SegmentReader mergedReader) throws IOException {
 3792   
 3793       assert testPoint("startCommitMerge");
 3794   
 3795       if (hitOOM) {
 3796         throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete merge");
 3797       }
 3798   
 3799       if (infoStream != null)
 3800         message("commitMerge: " + merge.segString(directory) + " index=" + segString());
 3801   
 3802       assert merge.registerDone;
 3803   
 3804       // If merge was explicitly aborted, or, if rollback() or
 3805       // rollbackTransaction() had been called since our merge
 3806       // started (which results in an unqualified
 3807       // deleter.refresh() call that will remove any index
 3808       // file that current segments does not reference), we
 3809       // abort this merge
 3810       if (merge.isAborted()) {
 3811         if (infoStream != null)
 3812           message("commitMerge: skipping merge " + merge.segString(directory) + ": it was aborted");
 3813   
 3814         deleter.refresh(merge.info.name);
 3815         return false;
 3816       }
 3817   
 3818       final int start = ensureContiguousMerge(merge);
 3819   
 3820       commitMergedDeletes(merge, mergedReader);
 3821       docWriter.remapDeletes(segmentInfos, merger.getDocMaps(), merger.getDelCounts(), merge, mergedDocCount);
 3822         
 3823       // Simple optimization: if the doc store we are using
 3824       // has been closed and is in now compound format (but
 3825       // wasn't when we started), then we will switch to the
 3826       // compound format as well:
 3827       final String mergeDocStoreSegment = merge.info.getDocStoreSegment(); 
 3828       if (mergeDocStoreSegment != null && !merge.info.getDocStoreIsCompoundFile()) {
 3829         final int size = segmentInfos.size();
 3830         for(int i=0;i<size;i++) {
 3831           final SegmentInfo info = segmentInfos.info(i);
 3832           final String docStoreSegment = info.getDocStoreSegment();
 3833           if (docStoreSegment != null &&
 3834               docStoreSegment.equals(mergeDocStoreSegment) && 
 3835               info.getDocStoreIsCompoundFile()) {
 3836             merge.info.setDocStoreIsCompoundFile(true);
 3837             break;
 3838           }
 3839         }
 3840       }
 3841   
 3842       merge.info.setHasProx(merger.hasProx());
 3843   
 3844       segmentInfos.subList(start, start + merge.segments.size()).clear();
 3845       assert !segmentInfos.contains(merge.info);
 3846       segmentInfos.add(start, merge.info);
 3847   
 3848       // Must note the change to segmentInfos so any commits
 3849       // in-flight don't lose it:
 3850       checkpoint();
 3851   
 3852       // If the merged segments had pending changes, clear
 3853       // them so that they don't bother writing them to
 3854       // disk, updating SegmentInfo, etc.:
 3855       readerPool.clear(merge.segments);
 3856   
 3857       if (merge.optimize)
 3858         segmentsToOptimize.add(merge.info);
 3859       return true;
 3860     }
 3861     
 3862     private synchronized void decrefMergeSegments(MergePolicy.OneMerge merge) throws IOException {
 3863       assert merge.increfDone;
 3864       merge.increfDone = false;
 3865     }
 3866   
 3867     final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
 3868   
 3869       if (infoStream != null) {
 3870         message("handleMergeException: merge=" + merge.segString(directory) + " exc=" + t);
 3871       }
 3872   
 3873       // Set the exception on the merge, so if
 3874       // optimize() is waiting on us it sees the root
 3875       // cause exception:
 3876       merge.setException(t);
 3877       addMergeException(merge);
 3878   
 3879       if (t instanceof MergePolicy.MergeAbortedException) {
 3880         // We can ignore this exception (it happens when
 3881         // close(false) or rollback is called), unless the
 3882         // merge involves segments from external directories,
 3883         // in which case we must throw it so, for example, the
 3884         // rollbackTransaction code in addIndexes* is
 3885         // executed.
 3886         if (merge.isExternal)
 3887           throw (MergePolicy.MergeAbortedException) t;
 3888       } else if (t instanceof IOException)
 3889         throw (IOException) t;
 3890       else if (t instanceof RuntimeException)
 3891         throw (RuntimeException) t;
 3892       else if (t instanceof Error)
 3893         throw (Error) t;
 3894       else
 3895         // Should not get here
 3896         throw new RuntimeException(t);
 3897     }
 3898   
 3899     /**
 3900      * Merges the indicated segments, replacing them in the stack with a
 3901      * single segment.
 3902      */
 3903   
 3904     final void merge(MergePolicy.OneMerge merge)
 3905       throws CorruptIndexException, IOException {
 3906   
 3907       boolean success = false;
 3908   
 3909       try {
 3910         try {
 3911           try {
 3912             mergeInit(merge);
 3913   
 3914             if (infoStream != null)
 3915               message("now merge\n  merge=" + merge.segString(directory) + "\n  merge=" + merge + "\n  index=" + segString());
 3916   
 3917             mergeMiddle(merge);
 3918             mergeSuccess(merge);
 3919             success = true;
 3920           } catch (Throwable t) {
 3921             handleMergeException(t, merge);
 3922           }
 3923         } finally {
 3924           synchronized(this) {
 3925             mergeFinish(merge);
 3926   
 3927             if (!success) {
 3928               if (infoStream != null)
 3929                 message("hit exception during merge");
 3930               if (merge.info != null && !segmentInfos.contains(merge.info))
 3931                 deleter.refresh(merge.info.name);
 3932             }
 3933   
 3934             // This merge (and, generally, any change to the
 3935             // segments) may now enable new merges, so we call
 3936             // merge policy & update pending merges.
 3937             if (success && !merge.isAborted() && !closed && !closing)
 3938               updatePendingMerges(merge.maxNumSegmentsOptimize, merge.optimize);
 3939           }
 3940         }
 3941       } catch (OutOfMemoryError oom) {
 3942         handleOOM(oom, "merge");
 3943       }
 3944     }
 3945   
 3946     /** Hook that's called when the specified merge is complete. */
 3947     void mergeSuccess(MergePolicy.OneMerge merge) {
 3948     }
 3949     
 3950     /** Checks whether this merge involves any segments
 3951      *  already participating in a merge.  If not, this merge
 3952      *  is "registered", meaning we record that its segments
 3953      *  are now participating in a merge, and true is
 3954      *  returned.  Else (the merge conflicts) false is
 3955      *  returned. */
 3956     final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException {
 3957   
 3958       if (merge.registerDone)
 3959         return true;
 3960   
 3961       if (stopMerges) {
 3962         merge.abort();
 3963         throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory));
 3964       }
 3965   
 3966       final int count = merge.segments.size();
 3967       boolean isExternal = false;
 3968       for(int i=0;i<count;i++) {
 3969         final SegmentInfo info = merge.segments.info(i);
 3970         if (mergingSegments.contains(info))
 3971           return false;
 3972         if (segmentInfos.indexOf(info) == -1)
 3973           return false;
 3974         if (info.dir != directory)
 3975           isExternal = true;
 3976       }
 3977   
 3978       ensureContiguousMerge(merge);
 3979   
 3980       pendingMerges.add(merge);
 3981   
 3982       if (infoStream != null)
 3983         message("add merge to pendingMerges: " + merge.segString(directory) + " [total " + pendingMerges.size() + " pending]");
 3984   
 3985       merge.mergeGen = mergeGen;
 3986       merge.isExternal = isExternal;
 3987   
 3988       // OK it does not conflict; now record that this merge
 3989       // is running (while synchronized) to avoid race
 3990       // condition where two conflicting merges from different
 3991       // threads, start
 3992       for(int i=0;i<count;i++)
 3993         mergingSegments.add(merge.segments.info(i));
 3994   
 3995       // Merge is now registered
 3996       merge.registerDone = true;
 3997       return true;
 3998     }
 3999   
 4000     /** Does initial setup for a merge, which is fast but holds
 4001      *  the synchronized lock on IndexWriter instance.  */
 4002     final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
 4003       boolean success = false;
 4004       try {
 4005         _mergeInit(merge);
 4006         success = true;
 4007       } finally {
 4008         if (!success) {
 4009           mergeFinish(merge);
 4010         }
 4011       }
 4012     }
 4013   
 4014     final synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException {
 4015   
 4016       assert testPoint("startMergeInit");
 4017   
 4018       assert merge.registerDone;
 4019       assert !merge.optimize || merge.maxNumSegmentsOptimize > 0;
 4020   
 4021       if (hitOOM) {
 4022         throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
 4023       }
 4024   
 4025       if (merge.info != null)
 4026         // mergeInit already done
 4027         return;
 4028   
 4029       if (merge.isAborted())
 4030         return;
 4031   
 4032       applyDeletes();
 4033   
 4034       final SegmentInfos sourceSegments = merge.segments;
 4035       final int end = sourceSegments.size();
 4036   
 4037       // Check whether this merge will allow us to skip
 4038       // merging the doc stores (stored field & vectors).
 4039       // This is a very substantial optimization (saves tons
 4040       // of IO).
 4041   
 4042       Directory lastDir = directory;
 4043       String lastDocStoreSegment = null;
 4044       int next = -1;
 4045   
 4046       boolean mergeDocStores = false;
 4047       boolean doFlushDocStore = false;
 4048       final String currentDocStoreSegment = docWriter.getDocStoreSegment();
 4049   
 4050       // Test each segment to be merged: check if we need to
 4051       // flush/merge doc stores
 4052       for (int i = 0; i < end; i++) {
 4053         SegmentInfo si = sourceSegments.info(i);
 4054   
 4055         // If it has deletions we must merge the doc stores
 4056         if (si.hasDeletions())
 4057           mergeDocStores = true;
 4058   
 4059         // If it has its own (private) doc stores we must
 4060         // merge the doc stores
 4061         if (-1 == si.getDocStoreOffset())
 4062           mergeDocStores = true;
 4063   
 4064         // If it has a different doc store segment than
 4065         // previous segments, we must merge the doc stores
 4066         String docStoreSegment = si.getDocStoreSegment();
 4067         if (docStoreSegment == null)
 4068           mergeDocStores = true;
 4069         else if (lastDocStoreSegment == null)
 4070           lastDocStoreSegment = docStoreSegment;
 4071         else if (!lastDocStoreSegment.equals(docStoreSegment))
 4072           mergeDocStores = true;
 4073   
 4074         // Segments' docScoreOffsets must be in-order,
 4075         // contiguous.  For the default merge policy now
 4076         // this will always be the case but for an arbitrary
 4077         // merge policy this may not be the case
 4078         if (-1 == next)
 4079           next = si.getDocStoreOffset() + si.docCount;
 4080         else if (next != si.getDocStoreOffset())
 4081           mergeDocStores = true;
 4082         else
 4083           next = si.getDocStoreOffset() + si.docCount;
 4084         
 4085         // If the segment comes from a different directory
 4086         // we must merge
 4087         if (lastDir != si.dir)
 4088           mergeDocStores = true;
 4089   
 4090         // If the segment is referencing the current "live"
 4091         // doc store outputs then we must merge
 4092         if (si.getDocStoreOffset() != -1 && currentDocStoreSegment != null && si.getDocStoreSegment().equals(currentDocStoreSegment)) {
 4093           doFlushDocStore = true;
 4094         }
 4095       }
 4096   
 4097       final int docStoreOffset;
 4098       final String docStoreSegment;
 4099       final boolean docStoreIsCompoundFile;
 4100   
 4101       if (mergeDocStores) {
 4102         docStoreOffset = -1;
 4103         docStoreSegment = null;
 4104         docStoreIsCompoundFile = false;
 4105       } else {
 4106         SegmentInfo si = sourceSegments.info(0);        
 4107         docStoreOffset = si.getDocStoreOffset();
 4108         docStoreSegment = si.getDocStoreSegment();
 4109         docStoreIsCompoundFile = si.getDocStoreIsCompoundFile();
 4110       }
 4111   
 4112       if (mergeDocStores && doFlushDocStore) {
 4113         // SegmentMerger intends to merge the doc stores
 4114         // (stored fields, vectors), and at least one of the
 4115         // segments to be merged refers to the currently
 4116         // live doc stores.
 4117   
 4118         // TODO: if we know we are about to merge away these
 4119         // newly flushed doc store files then we should not
 4120         // make compound file out of them...
 4121         if (infoStream != null)
 4122           message("now flush at merge");
 4123         doFlush(true, false);
 4124       }
 4125   
 4126       merge.increfDone = true;
 4127   
 4128       merge.mergeDocStores = mergeDocStores;
 4129   
 4130       // Bind a new segment name here so even with
 4131       // ConcurrentMergePolicy we keep deterministic segment
 4132       // names.
 4133       merge.info = new SegmentInfo(newSegmentName(), 0,
 4134                                    directory, false, true,
 4135                                    docStoreOffset,
 4136                                    docStoreSegment,
 4137                                    docStoreIsCompoundFile,
 4138                                    false);
 4139   
 4140   
 4141       Map<String,String> details = new HashMap<String,String>();
 4142       details.put("optimize", merge.optimize+"");
 4143       details.put("mergeFactor", end+"");
 4144       details.put("mergeDocStores", mergeDocStores+"");
 4145       setDiagnostics(merge.info, "merge", details);
 4146   
 4147       // Also enroll the merged segment into mergingSegments;
 4148       // this prevents it from getting selected for a merge
 4149       // after our merge is done but while we are building the
 4150       // CFS:
 4151       mergingSegments.add(merge.info);
 4152     }
 4153   
 4154     private void setDiagnostics(SegmentInfo info, String source) {
 4155       setDiagnostics(info, source, null);
 4156     }
 4157   
 4158     private void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) {
 4159       Map<String,String> diagnostics = new HashMap<String,String>();
 4160       diagnostics.put("source", source);
 4161       diagnostics.put("lucene.version", Constants.LUCENE_VERSION);
 4162       diagnostics.put("os", Constants.OS_NAME+"");
 4163       diagnostics.put("os.arch", Constants.OS_ARCH+"");
 4164       diagnostics.put("os.version", Constants.OS_VERSION+"");
 4165       diagnostics.put("java.version", Constants.JAVA_VERSION+"");
 4166       diagnostics.put("java.vendor", Constants.JAVA_VENDOR+"");
 4167       if (details != null) {
 4168         diagnostics.putAll(details);
 4169       }
 4170       info.setDiagnostics(diagnostics);
 4171     }
 4172   
 4173     /** Does fininishing for a merge, which is fast but holds
 4174      *  the synchronized lock on IndexWriter instance. */
 4175     final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException {
 4176       
 4177       // Optimize, addIndexes or finishMerges may be waiting
 4178       // on merges to finish.
 4179       notifyAll();
 4180   
 4181       if (merge.increfDone)
 4182         decrefMergeSegments(merge);
 4183   
 4184       // It's possible we are called twice, eg if there was an
 4185       // exception inside mergeInit
 4186       if (merge.registerDone) {
 4187         final SegmentInfos sourceSegments = merge.segments;
 4188         final int end = sourceSegments.size();
 4189         for(int i=0;i<end;i++)
 4190           mergingSegments.remove(sourceSegments.info(i));
 4191         mergingSegments.remove(merge.info);
 4192         merge.registerDone = false;
 4193       }
 4194   
 4195       runningMerges.remove(merge);
 4196     }
 4197   
 4198     /** Does the actual (time-consuming) work of the merge,
 4199      *  but without holding synchronized lock on IndexWriter
 4200      *  instance */
 4201     final private int mergeMiddle(MergePolicy.OneMerge merge) 
 4202       throws CorruptIndexException, IOException {
 4203       
 4204       merge.checkAborted(directory);
 4205   
 4206       final String mergedName = merge.info.name;
 4207       
 4208       SegmentMerger merger = null;
 4209   
 4210       int mergedDocCount = 0;
 4211   
 4212       SegmentInfos sourceSegments = merge.segments;
 4213       final int numSegments = sourceSegments.size();
 4214   
 4215       if (infoStream != null)
 4216         message("merging " + merge.segString(directory));
 4217   
 4218       merger = new SegmentMerger(this, mergedName, merge);
 4219   
 4220       merge.readers = new SegmentReader[numSegments];
 4221       merge.readersClone = new SegmentReader[numSegments];
 4222   
 4223       boolean mergeDocStores = false;
 4224   
 4225       final Set<String> dss = new HashSet<String>();
 4226       
 4227       // This is try/finally to make sure merger's readers are
 4228       // closed:
 4229       boolean success = false;
 4230       try {
 4231         int totDocCount = 0;
 4232   
 4233         for (int i = 0; i < numSegments; i++) {
 4234   
 4235           final SegmentInfo info = sourceSegments.info(i);
 4236   
 4237           // Hold onto the "live" reader; we will use this to
 4238           // commit merged deletes
 4239           SegmentReader reader = merge.readers[i] = readerPool.get(info, merge.mergeDocStores,
 4240                                                                    MERGE_READ_BUFFER_SIZE,
 4241                                                                    -1);
 4242   
 4243           // We clone the segment readers because other
 4244           // deletes may come in while we're merging so we
 4245           // need readers that will not change
 4246           SegmentReader clone = merge.readersClone[i] = (SegmentReader) reader.clone(true);
 4247           merger.add(clone);
 4248   
 4249           if (clone.hasDeletions()) {
 4250             mergeDocStores = true;
 4251           }
 4252           
 4253           if (info.getDocStoreOffset() != -1) {
 4254             dss.add(info.getDocStoreSegment());
 4255           }
 4256   
 4257           totDocCount += clone.numDocs();
 4258         }
 4259   
 4260         if (infoStream != null) {
 4261           message("merge: total "+totDocCount+" docs");
 4262         }
 4263   
 4264         merge.checkAborted(directory);
 4265   
 4266         // If deletions have arrived and it has now become
 4267         // necessary to merge doc stores, go and open them:
 4268         if (mergeDocStores && !merge.mergeDocStores) {
 4269           merge.mergeDocStores = true;
 4270           synchronized(this) {
 4271             if (dss.contains(docWriter.getDocStoreSegment())) {
 4272               if (infoStream != null)
 4273                 message("now flush at mergeMiddle");
 4274               doFlush(true, false);
 4275             }
 4276           }
 4277   
 4278           for(int i=0;i<numSegments;i++) {
 4279             merge.readersClone[i].openDocStores();
 4280           }
 4281   
 4282           // Clear DSS
 4283           synchronized(this) {
 4284             merge.info.setDocStore(-1, null, false);
 4285           }
 4286         }
 4287   
 4288         // This is where all the work happens:
 4289         mergedDocCount = merge.info.docCount = merger.merge(merge.mergeDocStores);
 4290   
 4291         assert mergedDocCount == totDocCount;
 4292   
 4293         // TODO: in the non-realtime case, we may want to only
 4294         // keep deletes (it's costly to open entire reader
 4295         // when we just need deletes)
 4296   
 4297         final SegmentReader mergedReader = readerPool.get(merge.info, false, BufferedIndexInput.BUFFER_SIZE, -1);
 4298         try {
 4299           if (poolReaders && mergedSegmentWarmer != null) {
 4300             mergedSegmentWarmer.warm(mergedReader);
 4301           }
 4302           if (!commitMerge(merge, merger, mergedDocCount, mergedReader))
 4303             // commitMerge will return false if this merge was aborted
 4304             return 0;
 4305         } finally {
 4306           synchronized(this) {
 4307             readerPool.release(mergedReader);
 4308           }
 4309         }
 4310   
 4311         success = true;
 4312       } finally {
 4313         synchronized(this) {
 4314           if (!success) {
 4315             // Suppress any new exceptions so we throw the
 4316             // original cause
 4317             for (int i=0;i<numSegments;i++) {
 4318               if (merge.readers[i] != null) {
 4319                 try {
 4320                   readerPool.release(merge.readers[i], true);
 4321                 } catch (Throwable t) {
 4322                 }
 4323               }
 4324   
 4325               if (merge.readersClone[i] != null) {
 4326                 try {
 4327                   merge.readersClone[i].close();
 4328                 } catch (Throwable t) {
 4329                 }
 4330                 // This was a private clone and we had the only reference
 4331                 assert merge.readersClone[i].getRefCount() == 0;
 4332               }
 4333             }
 4334           } else {
 4335             for (int i=0;i<numSegments;i++) {
 4336               if (merge.readers[i] != null) {
 4337                 readerPool.release(merge.readers[i], true);
 4338               }
 4339   
 4340               if (merge.readersClone[i] != null) {
 4341                 merge.readersClone[i].close();
 4342                 // This was a private clone and we had the only reference
 4343                 assert merge.readersClone[i].getRefCount() == 0;
 4344               }
 4345             }
 4346           }
 4347         }
 4348       }
 4349   
 4350       // Must checkpoint before decrefing so any newly
 4351       // referenced files in the new merge.info are incref'd
 4352       // first:
 4353       synchronized(this) {
 4354         deleter.checkpoint(segmentInfos, false);
 4355       }
 4356       decrefMergeSegments(merge);
 4357   
 4358       if (merge.useCompoundFile) {
 4359   
 4360         success = false;
 4361         final String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION;
 4362   
 4363         try {
 4364           merger.createCompoundFile(compoundFileName);
 4365           success = true;
 4366         } catch (IOException ioe) {
 4367           synchronized(this) {
 4368             if (merge.isAborted()) {
 4369               // This can happen if rollback or close(false)
 4370               // is called -- fall through to logic below to
 4371               // remove the partially created CFS:
 4372               success = true;
 4373             } else
 4374               handleMergeException(ioe, merge);
 4375           }
 4376         } catch (Throwable t) {
 4377           handleMergeException(t, merge);
 4378         } finally {
 4379           if (!success) {
 4380             if (infoStream != null)
 4381               message("hit exception creating compound file during merge");
 4382             synchronized(this) {
 4383               deleter.deleteFile(compoundFileName);
 4384             }
 4385           }
 4386         }
 4387   
 4388         if (merge.isAborted()) {
 4389           if (infoStream != null)
 4390             message("abort merge after building CFS");
 4391           deleter.deleteFile(compoundFileName);
 4392           return 0;
 4393         }
 4394   
 4395         synchronized(this) {
 4396           if (segmentInfos.indexOf(merge.info) == -1 || merge.isAborted()) {
 4397             // Our segment (committed in non-compound
 4398             // format) got merged away while we were
 4399             // building the compound format.
 4400             deleter.deleteFile(compoundFileName);
 4401           } else {
 4402             merge.info.setUseCompoundFile(true);
 4403             checkpoint();
 4404           }
 4405         }
 4406       }
 4407   
 4408       return mergedDocCount;
 4409     }
 4410   
 4411     synchronized void addMergeException(MergePolicy.OneMerge merge) {
 4412       assert merge.getException() != null;
 4413       if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen)
 4414         mergeExceptions.add(merge);
 4415     }
 4416   
 4417     // Apply buffered deletes to all segments.
 4418     private final synchronized boolean applyDeletes() throws CorruptIndexException, IOException {
 4419       assert testPoint("startApplyDeletes");
 4420       flushDeletesCount++;
 4421       SegmentInfos rollback = (SegmentInfos) segmentInfos.clone();
 4422       boolean success = false;
 4423       boolean changed;
 4424       try {
 4425         changed = docWriter.applyDeletes(segmentInfos);
 4426         success = true;
 4427       } finally {
 4428         if (!success) {
 4429           if (infoStream != null)
 4430             message("hit exception flushing deletes");
 4431   
 4432           // Carefully remove any partially written .del
 4433           // files
 4434           final int size = rollback.size();
 4435           for(int i=0;i<size;i++) {
 4436             final String newDelFileName = segmentInfos.info(i).getDelFileName();
 4437             final String delFileName = rollback.info(i).getDelFileName();
 4438             if (newDelFileName != null && !newDelFileName.equals(delFileName))
 4439               deleter.deleteFile(newDelFileName);
 4440           }
 4441   
 4442           // Fully replace the segmentInfos since flushed
 4443           // deletes could have changed any of the
 4444           // SegmentInfo instances:
 4445           segmentInfos.clear();
 4446           segmentInfos.addAll(rollback);
 4447         }
 4448       }
 4449   
 4450       if (changed)
 4451         checkpoint();
 4452       return changed;
 4453     }
 4454   
 4455     // For test purposes.
 4456     final synchronized int getBufferedDeleteTermsSize() {
 4457       return docWriter.getBufferedDeleteTerms().size();
 4458     }
 4459   
 4460     // For test purposes.
 4461     final synchronized int getNumBufferedDeleteTerms() {
 4462       return docWriter.getNumBufferedDeleteTerms();
 4463     }
 4464   
 4465     // utility routines for tests
 4466     SegmentInfo newestSegment() {
 4467       return segmentInfos.info(segmentInfos.size()-1);
 4468     }
 4469   
 4470     public synchronized String segString() {
 4471       return segString(segmentInfos);
 4472     }
 4473   
 4474     private synchronized String segString(SegmentInfos infos) {
 4475       StringBuilder buffer = new StringBuilder();
 4476       final int count = infos.size();
 4477       for(int i = 0; i < count; i++) {
 4478         if (i > 0) {
 4479           buffer.append(' ');
 4480         }
 4481         final SegmentInfo info = infos.info(i);
 4482         buffer.append(info.segString(directory));
 4483         if (info.dir != directory)
 4484           buffer.append("**");
 4485       }
 4486       return buffer.toString();
 4487     }
 4488   
 4489     // Files that have been sync'd already
 4490     private HashSet<String> synced = new HashSet<String>();
 4491   
 4492     // Files that are now being sync'd
 4493     private HashSet<String> syncing = new HashSet<String>();
 4494   
 4495     private boolean startSync(String fileName, Collection<String> pending) {
 4496       synchronized(synced) {
 4497         if (!synced.contains(fileName)) {
 4498           if (!syncing.contains(fileName)) {
 4499             syncing.add(fileName);
 4500             return true;
 4501           } else {
 4502             pending.add(fileName);
 4503             return false;
 4504           }
 4505         } else
 4506           return false;
 4507       }
 4508     }
 4509   
 4510     private void finishSync(String fileName, boolean success) {
 4511       synchronized(synced) {
 4512         assert syncing.contains(fileName);
 4513         syncing.remove(fileName);
 4514         if (success)
 4515           synced.add(fileName);
 4516         synced.notifyAll();
 4517       }
 4518     }
 4519   
 4520     /** Blocks until all files in syncing are sync'd */
 4521     private boolean waitForAllSynced(Collection<String> syncing) throws IOException {
 4522       synchronized(synced) {
 4523         Iterator<String> it = syncing.iterator();
 4524         while(it.hasNext()) {
 4525           final String fileName = it.next();
 4526           while(!synced.contains(fileName)) {
 4527             if (!syncing.contains(fileName))
 4528               // There was an error because a file that was
 4529               // previously syncing failed to appear in synced
 4530               return false;
 4531             else
 4532               try {
 4533                 synced.wait();
 4534               } catch (InterruptedException ie) {
 4535                 throw new ThreadInterruptedException(ie);
 4536               }
 4537           }
 4538         }
 4539         return true;
 4540       }
 4541     }
 4542   
 4543     private synchronized void doWait() {
 4544       // NOTE: the callers of this method should in theory
 4545       // be able to do simply wait(), but, as a defense
 4546       // against thread timing hazards where notifyAll()
 4547       // falls to be called, we wait for at most 1 second
 4548       // and then return so caller can check if wait
 4549       // conditions are satisfied:
 4550       try {
 4551         wait(1000);
 4552       } catch (InterruptedException ie) {
 4553         throw new ThreadInterruptedException(ie);
 4554       }
 4555     }
 4556   
 4557     /** Walk through all files referenced by the current
 4558      *  segmentInfos and ask the Directory to sync each file,
 4559      *  if it wasn't already.  If that succeeds, then we
 4560      *  prepare a new segments_N file but do not fully commit
 4561      *  it. */
 4562     private void startCommit(long sizeInBytes, Map<String,String> commitUserData) throws IOException {
 4563   
 4564       assert testPoint("startStartCommit");
 4565   
 4566       // TODO: as of LUCENE-2095, we can simplify this method,
 4567       // since only 1 thread can be in here at once
 4568   
 4569       if (hitOOM) {
 4570         throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
 4571       }
 4572   
 4573       try {
 4574   
 4575         if (infoStream != null)
 4576           message("startCommit(): start sizeInBytes=" + sizeInBytes);
 4577   
 4578         SegmentInfos toSync = null;
 4579         final long myChangeCount;
 4580   
 4581         synchronized(this) {
 4582   
 4583           // Wait for any running addIndexes to complete
 4584           // first, then block any from running until we've
 4585           // copied the segmentInfos we intend to sync:
 4586           blockAddIndexes(false);
 4587   
 4588           // On commit the segmentInfos must never
 4589           // reference a segment in another directory:
 4590           assert !hasExternalSegments();
 4591   
 4592           try {
 4593   
 4594             assert lastCommitChangeCount <= changeCount;
 4595   
 4596             if (changeCount == lastCommitChangeCount) {
 4597               if (infoStream != null)
 4598                 message("  skip startCommit(): no changes pending");
 4599               return;
 4600             }
 4601   
 4602             // First, we clone & incref the segmentInfos we intend
 4603             // to sync, then, without locking, we sync() each file
 4604             // referenced by toSync, in the background.  Multiple
 4605             // threads can be doing this at once, if say a large
 4606             // merge and a small merge finish at the same time:
 4607   
 4608             if (infoStream != null)
 4609               message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount);
 4610             
 4611             readerPool.commit();
 4612             
 4613             toSync = (SegmentInfos) segmentInfos.clone();
 4614   
 4615             if (commitUserData != null)
 4616               toSync.setUserData(commitUserData);
 4617   
 4618             deleter.incRef(toSync, false);
 4619             myChangeCount = changeCount;
 4620   
 4621             Collection<String> files = toSync.files(directory, false);
 4622             for(final String fileName: files) {
 4623               assert directory.fileExists(fileName): "file " + fileName + " does not exist";
 4624             }
 4625   
 4626           } finally {
 4627             resumeAddIndexes();
 4628           }
 4629         }
 4630   
 4631         assert testPoint("midStartCommit");
 4632   
 4633         boolean setPending = false;
 4634   
 4635         try {
 4636   
 4637           // Loop until all files toSync references are sync'd:
 4638           while(true) {
 4639   
 4640             final Collection<String> pending = new ArrayList<String>();
 4641   
 4642             Iterator<String> it = toSync.files(directory, false).iterator();
 4643             while(it.hasNext()) {
 4644               final String fileName = it.next();
 4645               if (startSync(fileName, pending)) {
 4646                 boolean success = false;
 4647                 try {
 4648                   // Because we incRef'd this commit point, above,
 4649                   // the file had better exist:
 4650                   assert directory.fileExists(fileName): "file '" + fileName + "' does not exist dir=" + directory;
 4651                   if (infoStream != null)
 4652                     message("now sync " + fileName);
 4653                   directory.sync(fileName);
 4654                   success = true;
 4655                 } finally {
 4656                   finishSync(fileName, success);
 4657                 }
 4658               }
 4659             }
 4660   
 4661             // All files that I require are either synced or being
 4662             // synced by other threads.  If they are being synced,
 4663             // we must at this point block until they are done.
 4664             // If this returns false, that means an error in
 4665             // another thread resulted in failing to actually
 4666             // sync one of our files, so we repeat:
 4667             if (waitForAllSynced(pending))
 4668               break;
 4669           }
 4670   
 4671           assert testPoint("midStartCommit2");
 4672   
 4673           synchronized(this) {
 4674             // If someone saved a newer version of segments file
 4675             // since I first started syncing my version, I can
 4676             // safely skip saving myself since I've been
 4677             // superseded:
 4678   
 4679             while(true) {
 4680               if (myChangeCount <= lastCommitChangeCount) {
 4681                 if (infoStream != null) {
 4682                   message("sync superseded by newer infos");
 4683                 }
 4684                 break;
 4685               } else if (pendingCommit == null) {
 4686                 // My turn to commit
 4687   
 4688                 if (segmentInfos.getGeneration() > toSync.getGeneration())
 4689                   toSync.updateGeneration(segmentInfos);
 4690   
 4691                 boolean success = false;
 4692                 try {
 4693   
 4694                   // Exception here means nothing is prepared
 4695                   // (this method unwinds everything it did on
 4696                   // an exception)
 4697                   try {
 4698                     toSync.prepareCommit(directory);
 4699                   } finally {
 4700                     // Have our master segmentInfos record the
 4701                     // generations we just prepared.  We do this
 4702                     // on error or success so we don't
 4703                     // double-write a segments_N file.
 4704                     segmentInfos.updateGeneration(toSync);
 4705                   }
 4706   
 4707                   assert pendingCommit == null;
 4708                   setPending = true;
 4709                   pendingCommit = toSync;
 4710                   pendingCommitChangeCount = myChangeCount;
 4711                   success = true;
 4712                 } finally {
 4713                   if (!success && infoStream != null)
 4714                     message("hit exception committing segments file");
 4715                 }
 4716                 break;
 4717               } else {
 4718                 // Must wait for other commit to complete
 4719                 doWait();
 4720               }
 4721             }
 4722           }
 4723   
 4724           if (infoStream != null)
 4725             message("done all syncs");
 4726   
 4727           assert testPoint("midStartCommitSuccess");
 4728   
 4729         } finally {
 4730           synchronized(this) {
 4731             if (!setPending)
 4732               deleter.decRef(toSync);
 4733           }
 4734         }
 4735       } catch (OutOfMemoryError oom) {
 4736         handleOOM(oom, "startCommit");
 4737       }
 4738       assert testPoint("finishStartCommit");
 4739     }
 4740   
 4741     /**
 4742      * Returns <code>true</code> iff the index in the named directory is
 4743      * currently locked.
 4744      * @param directory the directory to check for a lock
 4745      * @throws IOException if there is a low-level IO error
 4746      */
 4747     public static boolean isLocked(Directory directory) throws IOException {
 4748       return directory.makeLock(WRITE_LOCK_NAME).isLocked();
 4749     }
 4750   
 4751     /**
 4752      * Forcibly unlocks the index in the named directory.
 4753      * <P>
 4754      * Caution: this should only be used by failure recovery code,
 4755      * when it is known that no other process nor thread is in fact
 4756      * currently accessing this index.
 4757      */
 4758     public static void unlock(Directory directory) throws IOException {
 4759       directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
 4760     }
 4761   
 4762     /**
 4763      * Specifies maximum field length (in number of tokens/terms) in {@link IndexWriter} constructors.
 4764      * {@link #setMaxFieldLength(int)} overrides the value set by
 4765      * the constructor.
 4766      */
 4767     public static final class MaxFieldLength {
 4768   
 4769       private int limit;
 4770       private String name;
 4771   
 4772       /**
 4773        * Private type-safe-enum-pattern constructor.
 4774        * 
 4775        * @param name instance name
 4776        * @param limit maximum field length
 4777        */
 4778       private MaxFieldLength(String name, int limit) {
 4779         this.name = name;
 4780         this.limit = limit;
 4781       }
 4782   
 4783       /**
 4784        * Public constructor to allow users to specify the maximum field size limit.
 4785        * 
 4786        * @param limit The maximum field length
 4787        */
 4788       public MaxFieldLength(int limit) {
 4789         this("User-specified", limit);
 4790       }
 4791       
 4792       public int getLimit() {
 4793         return limit;
 4794       }
 4795       
 4796       @Override
 4797       public String toString()
 4798       {
 4799         return name + ":" + limit;
 4800       }
 4801   
 4802       /** Sets the maximum field length to {@link Integer#MAX_VALUE}. */
 4803       public static final MaxFieldLength UNLIMITED
 4804           = new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE);
 4805   
 4806       /**
 4807        *  Sets the maximum field length to 
 4808        * {@link #DEFAULT_MAX_FIELD_LENGTH} 
 4809        * */
 4810       public static final MaxFieldLength LIMITED
 4811           = new MaxFieldLength("LIMITED", DEFAULT_MAX_FIELD_LENGTH);
 4812     }
 4813   
 4814     /** If {@link #getReader} has been called (ie, this writer
 4815      *  is in near real-time mode), then after a merge
 4816      *  completes, this class can be invoked to warm the
 4817      *  reader on the newly merged segment, before the merge
 4818      *  commits.  This is not required for near real-time
 4819      *  search, but will reduce search latency on opening a
 4820      *  new near real-time reader after a merge completes.
 4821      *
 4822      * <p><b>NOTE:</b> This API is experimental and might
 4823      * change in incompatible ways in the next release.</p>
 4824      *
 4825      * <p><b>NOTE</b>: warm is called before any deletes have
 4826      * been carried over to the merged segment. */
 4827     public static abstract class IndexReaderWarmer {
 4828       public abstract void warm(IndexReader reader) throws IOException;
 4829     }
 4830   
 4831     private IndexReaderWarmer mergedSegmentWarmer;
 4832   
 4833     /** Set the merged segment warmer.  See {@link
 4834      *  IndexReaderWarmer}. */
 4835     public void setMergedSegmentWarmer(IndexReaderWarmer warmer) {
 4836       mergedSegmentWarmer = warmer;
 4837     }
 4838   
 4839     /** Returns the current merged segment warmer.  See {@link
 4840      *  IndexReaderWarmer}. */
 4841     public IndexReaderWarmer getMergedSegmentWarmer() {
 4842       return mergedSegmentWarmer;
 4843     }
 4844   
 4845     private void handleOOM(OutOfMemoryError oom, String location) {
 4846       if (infoStream != null) {
 4847         message("hit OutOfMemoryError inside " + location);
 4848       }
 4849       hitOOM = true;
 4850       throw oom;
 4851     }
 4852   
 4853     // Used only by assert for testing.  Current points:
 4854     //   startDoFlush
 4855     //   startCommitMerge
 4856     //   startStartCommit
 4857     //   midStartCommit
 4858     //   midStartCommit2
 4859     //   midStartCommitSuccess
 4860     //   finishStartCommit
 4861     //   startCommitMergeDeletes
 4862     //   startMergeInit
 4863     //   startApplyDeletes
 4864     //   DocumentsWriter.ThreadState.init start
 4865     boolean testPoint(String name) {
 4866       return true;
 4867     }
 4868   
 4869     synchronized boolean nrtIsCurrent(SegmentInfos infos) {
 4870       if (!infos.equals(segmentInfos)) {
 4871         // if any structural changes (new segments), we are
 4872         // stale
 4873         return false;
 4874       } else {
 4875         return !docWriter.anyChanges();
 4876       }
 4877     }
 4878   
 4879     synchronized boolean isClosed() {
 4880       return closed;
 4881     }
 4882   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » index » [javadoc | source]