Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » index » [javadoc | source]
    1   package org.apache.lucene.index;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.IOException;
   21   import java.util.ArrayList;
   22   import java.util.Arrays;
   23   import java.util.Collection;
   24   import java.util.HashMap;
   25   import java.util.HashSet;
   26   
   27   import java.util.List;
   28   import java.util.Map;
   29   import java.util.Set;
   30   
   31   import org.apache.lucene.document.Document;
   32   import org.apache.lucene.document.FieldSelector;
   33   import org.apache.lucene.search.DefaultSimilarity;
   34   import org.apache.lucene.store.BufferedIndexInput;
   35   import org.apache.lucene.store.Directory;
   36   import org.apache.lucene.store.IndexInput;
   37   import org.apache.lucene.store.IndexOutput;
   38   import org.apache.lucene.util.BitVector;
   39   import org.apache.lucene.util.CloseableThreadLocal;
   40   
   41   /** @version $Id */
   42   /**
   43    * <p><b>NOTE:</b> This API is new and still experimental
   44    * (subject to change suddenly in the next release)</p>
   45    */
   46   public class SegmentReader extends IndexReader implements Cloneable {
   47     protected boolean readOnly;
   48   
   49     private SegmentInfo si;
   50     private int readBufferSize;
   51   
   52     CloseableThreadLocal<FieldsReader> fieldsReaderLocal = new FieldsReaderLocal();
   53     CloseableThreadLocal<TermVectorsReader> termVectorsLocal = new CloseableThreadLocal<TermVectorsReader>();
   54   
   55     BitVector deletedDocs = null;
   56     Ref deletedDocsRef = null;
   57     private boolean deletedDocsDirty = false;
   58     private boolean normsDirty = false;
   59     private int pendingDeleteCount;
   60   
   61     private boolean rollbackHasChanges = false;
   62     private boolean rollbackDeletedDocsDirty = false;
   63     private boolean rollbackNormsDirty = false;
   64     private int rollbackPendingDeleteCount;
   65   
   66     // optionally used for the .nrm file shared by multiple norms
   67     private IndexInput singleNormStream;
   68     private Ref singleNormRef;
   69   
   70     CoreReaders core;
   71   
   72     // Holds core readers that are shared (unchanged) when
   73     // SegmentReader is cloned or reopened
   74     static final class CoreReaders {
   75   
   76       // Counts how many other reader share the core objects
   77       // (freqStream, proxStream, tis, etc.) of this reader;
   78       // when coreRef drops to 0, these core objects may be
   79       // closed.  A given instance of SegmentReader may be
   80       // closed, even those it shares core objects with other
   81       // SegmentReaders:
   82       private final Ref ref = new Ref();
   83   
   84       final String segment;
   85       final FieldInfos fieldInfos;
   86       final IndexInput freqStream;
   87       final IndexInput proxStream;
   88       final TermInfosReader tisNoIndex;
   89   
   90       final Directory dir;
   91       final Directory cfsDir;
   92       final int readBufferSize;
   93       final int termsIndexDivisor;
   94   
   95       TermInfosReader tis;
   96       FieldsReader fieldsReaderOrig;
   97       TermVectorsReader termVectorsReaderOrig;
   98       CompoundFileReader cfsReader;
   99       CompoundFileReader storeCFSReader;
  100   
  101       CoreReaders(Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) throws IOException {
  102         segment = si.name;
  103         this.readBufferSize = readBufferSize;
  104         this.dir = dir;
  105   
  106         boolean success = false;
  107   
  108         try {
  109           Directory dir0 = dir;
  110           if (si.getUseCompoundFile()) {
  111             cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
  112             dir0 = cfsReader;
  113           }
  114           cfsDir = dir0;
  115   
  116           fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
  117   
  118           this.termsIndexDivisor = termsIndexDivisor;
  119           TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor);
  120           if (termsIndexDivisor == -1) {
  121             tisNoIndex = reader;
  122           } else {
  123             tis = reader;
  124             tisNoIndex = null;
  125           }
  126   
  127           // make sure that all index files have been read or are kept open
  128           // so that if an index update removes them we'll still have them
  129           freqStream = cfsDir.openInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize);
  130   
  131           if (fieldInfos.hasProx()) {
  132             proxStream = cfsDir.openInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize);
  133           } else {
  134             proxStream = null;
  135           }
  136           success = true;
  137         } finally {
  138           if (!success) {
  139             decRef();
  140           }
  141         }
  142       }
  143   
  144       synchronized TermVectorsReader getTermVectorsReaderOrig() {
  145         return termVectorsReaderOrig;
  146       }
  147   
  148       synchronized FieldsReader getFieldsReaderOrig() {
  149         return fieldsReaderOrig;
  150       }
  151   
  152       synchronized void incRef() {
  153         ref.incRef();
  154       }
  155   
  156       synchronized Directory getCFSReader() {
  157         return cfsReader;
  158       }
  159   
  160       synchronized TermInfosReader getTermsReader() {
  161         if (tis != null) {
  162           return tis;
  163         } else {
  164           return tisNoIndex;
  165         }
  166       }      
  167   
  168       synchronized boolean termsIndexIsLoaded() {
  169         return tis != null;
  170       }      
  171   
  172       // NOTE: only called from IndexWriter when a near
  173       // real-time reader is opened, or applyDeletes is run,
  174       // sharing a segment that's still being merged.  This
  175       // method is not fully thread safe, and relies on the
  176       // synchronization in IndexWriter
  177       synchronized void loadTermsIndex(SegmentInfo si, int termsIndexDivisor) throws IOException {
  178         if (tis == null) {
  179           Directory dir0;
  180           if (si.getUseCompoundFile()) {
  181             // In some cases, we were originally opened when CFS
  182             // was not used, but then we are asked to open the
  183             // terms reader with index, the segment has switched
  184             // to CFS
  185             if (cfsReader == null) {
  186               cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
  187             }
  188             dir0 = cfsReader;
  189           } else {
  190             dir0 = dir;
  191           }
  192   
  193           tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor);
  194         }
  195       }
  196   
  197       synchronized void decRef() throws IOException {
  198   
  199         if (ref.decRef() == 0) {
  200   
  201           // close everything, nothing is shared anymore with other readers
  202           if (tis != null) {
  203             tis.close();
  204             // null so if an app hangs on to us we still free most ram
  205             tis = null;
  206           }
  207           
  208           if (tisNoIndex != null) {
  209             tisNoIndex.close();
  210           }
  211           
  212           if (freqStream != null) {
  213             freqStream.close();
  214           }
  215   
  216           if (proxStream != null) {
  217             proxStream.close();
  218           }
  219   
  220           if (termVectorsReaderOrig != null) {
  221             termVectorsReaderOrig.close();
  222           }
  223     
  224           if (fieldsReaderOrig != null) {
  225             fieldsReaderOrig.close();
  226           }
  227     
  228           if (cfsReader != null) {
  229             cfsReader.close();
  230           }
  231     
  232           if (storeCFSReader != null) {
  233             storeCFSReader.close();
  234           }
  235         }
  236       }
  237   
  238       synchronized void openDocStores(SegmentInfo si) throws IOException {
  239   
  240         assert si.name.equals(segment);
  241   
  242         if (fieldsReaderOrig == null) {
  243           final Directory storeDir;
  244           if (si.getDocStoreOffset() != -1) {
  245             if (si.getDocStoreIsCompoundFile()) {
  246               assert storeCFSReader == null;
  247               storeCFSReader = new CompoundFileReader(dir,
  248                                                       si.getDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION,
  249                                                       readBufferSize);
  250               storeDir = storeCFSReader;
  251               assert storeDir != null;
  252             } else {
  253               storeDir = dir;
  254               assert storeDir != null;
  255             }
  256           } else if (si.getUseCompoundFile()) {
  257             // In some cases, we were originally opened when CFS
  258             // was not used, but then we are asked to open doc
  259             // stores after the segment has switched to CFS
  260             if (cfsReader == null) {
  261               cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
  262             }
  263             storeDir = cfsReader;
  264             assert storeDir != null;
  265           } else {
  266             storeDir = dir;
  267             assert storeDir != null;
  268           }
  269   
  270           final String storesSegment;
  271           if (si.getDocStoreOffset() != -1) {
  272             storesSegment = si.getDocStoreSegment();
  273           } else {
  274             storesSegment = segment;
  275           }
  276   
  277           fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize,
  278                                               si.getDocStoreOffset(), si.docCount);
  279   
  280           // Verify two sources of "maxDoc" agree:
  281           if (si.getDocStoreOffset() == -1 && fieldsReaderOrig.size() != si.docCount) {
  282             throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.size() + " but segmentInfo shows " + si.docCount);
  283           }
  284   
  285           if (fieldInfos.hasVectors()) { // open term vector files only as needed
  286             termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount);
  287           }
  288         }
  289       }
  290     }
  291   
  292     /**
  293      * Sets the initial value 
  294      */
  295     private class FieldsReaderLocal extends CloseableThreadLocal<FieldsReader> {
  296       @Override
  297       protected FieldsReader initialValue() {
  298         return (FieldsReader) core.getFieldsReaderOrig().clone();
  299       }
  300     }
  301     
  302     static class Ref {
  303       private int refCount = 1;
  304       
  305       @Override
  306       public String toString() {
  307         return "refcount: "+refCount;
  308       }
  309       
  310       public synchronized int refCount() {
  311         return refCount;
  312       }
  313       
  314       public synchronized int incRef() {
  315         assert refCount > 0;
  316         refCount++;
  317         return refCount;
  318       }
  319   
  320       public synchronized int decRef() {
  321         assert refCount > 0;
  322         refCount--;
  323         return refCount;
  324       }
  325     }
  326     
  327     /**
  328      * Byte[] referencing is used because a new norm object needs 
  329      * to be created for each clone, and the byte array is all 
  330      * that is needed for sharing between cloned readers.  The 
  331      * current norm referencing is for sharing between readers 
  332      * whereas the byte[] referencing is for copy on write which 
  333      * is independent of reader references (i.e. incRef, decRef).
  334      */
  335   
  336     final class Norm implements Cloneable {
  337       private int refCount = 1;
  338   
  339       // If this instance is a clone, the originalNorm
  340       // references the Norm that has a real open IndexInput:
  341       private Norm origNorm;
  342   
  343       private IndexInput in;
  344       private long normSeek;
  345   
  346       // null until bytes is set
  347       private Ref bytesRef;
  348       private byte[] bytes;
  349       private boolean dirty;
  350       private int number;
  351       private boolean rollbackDirty;
  352       
  353       public Norm(IndexInput in, int number, long normSeek) {
  354         this.in = in;
  355         this.number = number;
  356         this.normSeek = normSeek;
  357       }
  358   
  359       public synchronized void incRef() {
  360         assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
  361         refCount++;
  362       }
  363   
  364       private void closeInput() throws IOException {
  365         if (in != null) {
  366           if (in != singleNormStream) {
  367             // It's private to us -- just close it
  368             in.close();
  369           } else {
  370             // We are sharing this with others -- decRef and
  371             // maybe close the shared norm stream
  372             if (singleNormRef.decRef() == 0) {
  373               singleNormStream.close();
  374               singleNormStream = null;
  375             }
  376           }
  377   
  378           in = null;
  379         }
  380       }
  381   
  382       public synchronized void decRef() throws IOException {
  383         assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
  384   
  385         if (--refCount == 0) {
  386           if (origNorm != null) {
  387             origNorm.decRef();
  388             origNorm = null;
  389           } else {
  390             closeInput();
  391           }
  392   
  393           if (bytes != null) {
  394             assert bytesRef != null;
  395             bytesRef.decRef();
  396             bytes = null;
  397             bytesRef = null;
  398           } else {
  399             assert bytesRef == null;
  400           }
  401         }
  402       }
  403   
  404       // Load bytes but do not cache them if they were not
  405       // already cached
  406       public synchronized void bytes(byte[] bytesOut, int offset, int len) throws IOException {
  407         assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
  408         if (bytes != null) {
  409           // Already cached -- copy from cache:
  410           assert len <= maxDoc();
  411           System.arraycopy(bytes, 0, bytesOut, offset, len);
  412         } else {
  413           // Not cached
  414           if (origNorm != null) {
  415             // Ask origNorm to load
  416             origNorm.bytes(bytesOut, offset, len);
  417           } else {
  418             // We are orig -- read ourselves from disk:
  419             synchronized(in) {
  420               in.seek(normSeek);
  421               in.readBytes(bytesOut, offset, len, false);
  422             }
  423           }
  424         }
  425       }
  426   
  427       // Load & cache full bytes array.  Returns bytes.
  428       public synchronized byte[] bytes() throws IOException {
  429         assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
  430         if (bytes == null) {                     // value not yet read
  431           assert bytesRef == null;
  432           if (origNorm != null) {
  433             // Ask origNorm to load so that for a series of
  434             // reopened readers we share a single read-only
  435             // byte[]
  436             bytes = origNorm.bytes();
  437             bytesRef = origNorm.bytesRef;
  438             bytesRef.incRef();
  439   
  440             // Once we've loaded the bytes we no longer need
  441             // origNorm:
  442             origNorm.decRef();
  443             origNorm = null;
  444   
  445           } else {
  446             // We are the origNorm, so load the bytes for real
  447             // ourself:
  448             final int count = maxDoc();
  449             bytes = new byte[count];
  450   
  451             // Since we are orig, in must not be null
  452             assert in != null;
  453   
  454             // Read from disk.
  455             synchronized(in) {
  456               in.seek(normSeek);
  457               in.readBytes(bytes, 0, count, false);
  458             }
  459   
  460             bytesRef = new Ref();
  461             closeInput();
  462           }
  463         }
  464   
  465         return bytes;
  466       }
  467   
  468       // Only for testing
  469       Ref bytesRef() {
  470         return bytesRef;
  471       }
  472   
  473       // Called if we intend to change a norm value.  We make a
  474       // private copy of bytes if it's shared with others:
  475       public synchronized byte[] copyOnWrite() throws IOException {
  476         assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
  477         bytes();
  478         assert bytes != null;
  479         assert bytesRef != null;
  480         if (bytesRef.refCount() > 1) {
  481           // I cannot be the origNorm for another norm
  482           // instance if I'm being changed.  Ie, only the
  483           // "head Norm" can be changed:
  484           assert refCount == 1;
  485           final Ref oldRef = bytesRef;
  486           bytes = cloneNormBytes(bytes);
  487           bytesRef = new Ref();
  488           oldRef.decRef();
  489         }
  490         dirty = true;
  491         return bytes;
  492       }
  493       
  494       // Returns a copy of this Norm instance that shares
  495       // IndexInput & bytes with the original one
  496       @Override
  497       public synchronized Object clone() {
  498         assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
  499           
  500         Norm clone;
  501         try {
  502           clone = (Norm) super.clone();
  503         } catch (CloneNotSupportedException cnse) {
  504           // Cannot happen
  505           throw new RuntimeException("unexpected CloneNotSupportedException", cnse);
  506         }
  507         clone.refCount = 1;
  508   
  509         if (bytes != null) {
  510           assert bytesRef != null;
  511           assert origNorm == null;
  512   
  513           // Clone holds a reference to my bytes:
  514           clone.bytesRef.incRef();
  515         } else {
  516           assert bytesRef == null;
  517           if (origNorm == null) {
  518             // I become the origNorm for the clone:
  519             clone.origNorm = this;
  520           }
  521           clone.origNorm.incRef();
  522         }
  523   
  524         // Only the origNorm will actually readBytes from in:
  525         clone.in = null;
  526   
  527         return clone;
  528       }
  529   
  530       // Flush all pending changes to the next generation
  531       // separate norms file.
  532       public void reWrite(SegmentInfo si) throws IOException {
  533         assert refCount > 0 && (origNorm == null || origNorm.refCount > 0): "refCount=" + refCount + " origNorm=" + origNorm;
  534   
  535         // NOTE: norms are re-written in regular directory, not cfs
  536         si.advanceNormGen(this.number);
  537         IndexOutput out = directory().createOutput(si.getNormFileName(this.number));
  538         try {
  539           out.writeBytes(bytes, maxDoc());
  540         } finally {
  541           out.close();
  542         }
  543         this.dirty = false;
  544       }
  545     }
  546   
  547     Map<String,Norm> norms = new HashMap<String,Norm>();
  548     
  549     /**
  550      * @throws CorruptIndexException if the index is corrupt
  551      * @throws IOException if there is a low-level IO error
  552      */
  553     public static SegmentReader get(boolean readOnly, SegmentInfo si, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
  554       return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor);
  555     }
  556   
  557     /**
  558      * @throws CorruptIndexException if the index is corrupt
  559      * @throws IOException if there is a low-level IO error
  560      */
  561     public static SegmentReader get(boolean readOnly,
  562                                     Directory dir,
  563                                     SegmentInfo si,
  564                                     int readBufferSize,
  565                                     boolean doOpenStores,
  566                                     int termInfosIndexDivisor)
  567       throws CorruptIndexException, IOException {
  568       SegmentReader instance = readOnly ? new ReadOnlySegmentReader() : new SegmentReader();
  569       instance.readOnly = readOnly;
  570       instance.si = si;
  571       instance.readBufferSize = readBufferSize;
  572   
  573       boolean success = false;
  574   
  575       try {
  576         instance.core = new CoreReaders(dir, si, readBufferSize, termInfosIndexDivisor);
  577         if (doOpenStores) {
  578           instance.core.openDocStores(si);
  579         }
  580         instance.loadDeletedDocs();
  581         instance.openNorms(instance.core.cfsDir, readBufferSize);
  582         success = true;
  583       } finally {
  584   
  585         // With lock-less commits, it's entirely possible (and
  586         // fine) to hit a FileNotFound exception above.  In
  587         // this case, we want to explicitly close any subset
  588         // of things that were opened so that we don't have to
  589         // wait for a GC to do so.
  590         if (!success) {
  591           instance.doClose();
  592         }
  593       }
  594       return instance;
  595     }
  596   
  597     void openDocStores() throws IOException {
  598       core.openDocStores(si);
  599     }
  600   
  601     private void loadDeletedDocs() throws IOException {
  602       // NOTE: the bitvector is stored using the regular directory, not cfs
  603       if (hasDeletions(si)) {
  604         deletedDocs = new BitVector(directory(), si.getDelFileName());
  605         deletedDocsRef = new Ref();
  606        
  607         assert si.getDelCount() == deletedDocs.count() : 
  608           "delete count mismatch: info=" + si.getDelCount() + " vs BitVector=" + deletedDocs.count();
  609   
  610         // Verify # deletes does not exceed maxDoc for this
  611         // segment:
  612         assert si.getDelCount() <= maxDoc() : 
  613           "delete count mismatch: " + deletedDocs.count() + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name;
  614   
  615       } else
  616         assert si.getDelCount() == 0;
  617     }
  618     
  619     /**
  620      * Clones the norm bytes.  May be overridden by subclasses.  New and experimental.
  621      * @param bytes Byte array to clone
  622      * @return New BitVector
  623      */
  624     protected byte[] cloneNormBytes(byte[] bytes) {
  625       byte[] cloneBytes = new byte[bytes.length];
  626       System.arraycopy(bytes, 0, cloneBytes, 0, bytes.length);
  627       return cloneBytes;
  628     }
  629     
  630     /**
  631      * Clones the deleteDocs BitVector.  May be overridden by subclasses. New and experimental.
  632      * @param bv BitVector to clone
  633      * @return New BitVector
  634      */
  635     protected BitVector cloneDeletedDocs(BitVector bv) {
  636       return (BitVector)bv.clone();
  637     }
  638   
  639     @Override
  640     public final synchronized Object clone() {
  641       try {
  642         return clone(readOnly); // Preserve current readOnly
  643       } catch (Exception ex) {
  644         throw new RuntimeException(ex);
  645       }
  646     }
  647   
  648     @Override
  649     public final synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException {
  650       return reopenSegment(si, true, openReadOnly);
  651     }
  652   
  653     synchronized SegmentReader reopenSegment(SegmentInfo si, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException {
  654       boolean deletionsUpToDate = (this.si.hasDeletions() == si.hasDeletions()) 
  655                                     && (!si.hasDeletions() || this.si.getDelFileName().equals(si.getDelFileName()));
  656       boolean normsUpToDate = true;
  657       
  658       boolean[] fieldNormsChanged = new boolean[core.fieldInfos.size()];
  659       final int fieldCount = core.fieldInfos.size();
  660       for (int i = 0; i < fieldCount; i++) {
  661         if (!this.si.getNormFileName(i).equals(si.getNormFileName(i))) {
  662           normsUpToDate = false;
  663           fieldNormsChanged[i] = true;
  664         }
  665       }
  666   
  667       // if we're cloning we need to run through the reopenSegment logic
  668       // also if both old and new readers aren't readonly, we clone to avoid sharing modifications
  669       if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) {
  670         return this;
  671       }    
  672   
  673       // When cloning, the incoming SegmentInfos should not
  674       // have any changes in it:
  675       assert !doClone || (normsUpToDate && deletionsUpToDate);
  676   
  677       // clone reader
  678       SegmentReader clone = openReadOnly ? new ReadOnlySegmentReader() : new SegmentReader();
  679   
  680       boolean success = false;
  681       try {
  682         core.incRef();
  683         clone.core = core;
  684         clone.readOnly = openReadOnly;
  685         clone.si = si;
  686         clone.readBufferSize = readBufferSize;
  687   
  688         if (!openReadOnly && hasChanges) {
  689           // My pending changes transfer to the new reader
  690           clone.pendingDeleteCount = pendingDeleteCount;
  691           clone.deletedDocsDirty = deletedDocsDirty;
  692           clone.normsDirty = normsDirty;
  693           clone.hasChanges = hasChanges;
  694           hasChanges = false;
  695         }
  696         
  697         if (doClone) {
  698           if (deletedDocs != null) {
  699             deletedDocsRef.incRef();
  700             clone.deletedDocs = deletedDocs;
  701             clone.deletedDocsRef = deletedDocsRef;
  702           }
  703         } else {
  704           if (!deletionsUpToDate) {
  705             // load deleted docs
  706             assert clone.deletedDocs == null;
  707             clone.loadDeletedDocs();
  708           } else if (deletedDocs != null) {
  709             deletedDocsRef.incRef();
  710             clone.deletedDocs = deletedDocs;
  711             clone.deletedDocsRef = deletedDocsRef;
  712           }
  713         }
  714   
  715         clone.norms = new HashMap<String,Norm>();
  716   
  717         // Clone norms
  718         for (int i = 0; i < fieldNormsChanged.length; i++) {
  719   
  720           // Clone unchanged norms to the cloned reader
  721           if (doClone || !fieldNormsChanged[i]) {
  722             final String curField = core.fieldInfos.fieldInfo(i).name;
  723             Norm norm = this.norms.get(curField);
  724             if (norm != null)
  725               clone.norms.put(curField, (Norm) norm.clone());
  726           }
  727         }
  728   
  729         // If we are not cloning, then this will open anew
  730         // any norms that have changed:
  731         clone.openNorms(si.getUseCompoundFile() ? core.getCFSReader() : directory(), readBufferSize);
  732   
  733         success = true;
  734       } finally {
  735         if (!success) {
  736           // An exception occurred during reopen, we have to decRef the norms
  737           // that we incRef'ed already and close singleNormsStream and FieldsReader
  738           clone.decRef();
  739         }
  740       }
  741       
  742       return clone;
  743     }
  744   
  745     @Override
  746     protected void doCommit(Map<String,String> commitUserData) throws IOException {
  747       if (hasChanges) {
  748         if (deletedDocsDirty) {               // re-write deleted
  749           si.advanceDelGen();
  750   
  751           // We can write directly to the actual name (vs to a
  752           // .tmp & renaming it) because the file is not live
  753           // until segments file is written:
  754           deletedDocs.write(directory(), si.getDelFileName());
  755   
  756           si.setDelCount(si.getDelCount()+pendingDeleteCount);
  757           pendingDeleteCount = 0;
  758           assert deletedDocs.count() == si.getDelCount(): "delete count mismatch during commit: info=" + si.getDelCount() + " vs BitVector=" + deletedDocs.count();
  759         } else {
  760           assert pendingDeleteCount == 0;
  761         }
  762   
  763         if (normsDirty) {               // re-write norms
  764           si.setNumFields(core.fieldInfos.size());
  765           for (final Norm norm : norms.values()) {
  766             if (norm.dirty) {
  767               norm.reWrite(si);
  768             }
  769           }
  770         }
  771         deletedDocsDirty = false;
  772         normsDirty = false;
  773         hasChanges = false;
  774       }
  775     }
  776   
  777     FieldsReader getFieldsReader() {
  778       return fieldsReaderLocal.get();
  779     }
  780   
  781     @Override
  782     protected void doClose() throws IOException {
  783       termVectorsLocal.close();
  784       fieldsReaderLocal.close();
  785       
  786       if (deletedDocs != null) {
  787         deletedDocsRef.decRef();
  788         // null so if an app hangs on to us we still free most ram
  789         deletedDocs = null;
  790       }
  791   
  792       for (final Norm norm : norms.values()) {
  793         norm.decRef();
  794       }
  795       if (core != null) {
  796         core.decRef();
  797       }
  798     }
  799   
  800     static boolean hasDeletions(SegmentInfo si) throws IOException {
  801       // Don't call ensureOpen() here (it could affect performance)
  802       return si.hasDeletions();
  803     }
  804   
  805     @Override
  806     public boolean hasDeletions() {
  807       // Don't call ensureOpen() here (it could affect performance)
  808       return deletedDocs != null;
  809     }
  810   
  811     static boolean usesCompoundFile(SegmentInfo si) throws IOException {
  812       return si.getUseCompoundFile();
  813     }
  814   
  815     static boolean hasSeparateNorms(SegmentInfo si) throws IOException {
  816       return si.hasSeparateNorms();
  817     }
  818   
  819     @Override
  820     protected void doDelete(int docNum) {
  821       if (deletedDocs == null) {
  822         deletedDocs = new BitVector(maxDoc());
  823         deletedDocsRef = new Ref();
  824       }
  825       // there is more than 1 SegmentReader with a reference to this
  826       // deletedDocs BitVector so decRef the current deletedDocsRef,
  827       // clone the BitVector, create a new deletedDocsRef
  828       if (deletedDocsRef.refCount() > 1) {
  829         Ref oldRef = deletedDocsRef;
  830         deletedDocs = cloneDeletedDocs(deletedDocs);
  831         deletedDocsRef = new Ref();
  832         oldRef.decRef();
  833       }
  834       deletedDocsDirty = true;
  835       if (!deletedDocs.getAndSet(docNum))
  836         pendingDeleteCount++;
  837     }
  838   
  839     @Override
  840     protected void doUndeleteAll() {
  841       deletedDocsDirty = false;
  842       if (deletedDocs != null) {
  843         assert deletedDocsRef != null;
  844         deletedDocsRef.decRef();
  845         deletedDocs = null;
  846         deletedDocsRef = null;
  847         pendingDeleteCount = 0;
  848         si.clearDelGen();
  849         si.setDelCount(0);
  850       } else {
  851         assert deletedDocsRef == null;
  852         assert pendingDeleteCount == 0;
  853       }
  854     }
  855   
  856     List<String> files() throws IOException {
  857       return new ArrayList<String>(si.files());
  858     }
  859   
  860     @Override
  861     public TermEnum terms() {
  862       ensureOpen();
  863       return core.getTermsReader().terms();
  864     }
  865   
  866     @Override
  867     public TermEnum terms(Term t) throws IOException {
  868       ensureOpen();
  869       return core.getTermsReader().terms(t);
  870     }
  871   
  872     FieldInfos fieldInfos() {
  873       return core.fieldInfos;
  874     }
  875   
  876     @Override
  877     public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
  878       ensureOpen();
  879       return getFieldsReader().doc(n, fieldSelector);
  880     }
  881   
  882     @Override
  883     public synchronized boolean isDeleted(int n) {
  884       return (deletedDocs != null && deletedDocs.get(n));
  885     }
  886   
  887     @Override
  888     public TermDocs termDocs(Term term) throws IOException {
  889       if (term == null) {
  890         return new AllTermDocs(this);
  891       } else {
  892         return super.termDocs(term);
  893       }
  894     }
  895   
  896     @Override
  897     public TermDocs termDocs() throws IOException {
  898       ensureOpen();
  899       return new SegmentTermDocs(this);
  900     }
  901   
  902     @Override
  903     public TermPositions termPositions() throws IOException {
  904       ensureOpen();
  905       return new SegmentTermPositions(this);
  906     }
  907   
  908     @Override
  909     public int docFreq(Term t) throws IOException {
  910       ensureOpen();
  911       TermInfo ti = core.getTermsReader().get(t);
  912       if (ti != null)
  913         return ti.docFreq;
  914       else
  915         return 0;
  916     }
  917   
  918     @Override
  919     public int numDocs() {
  920       // Don't call ensureOpen() here (it could affect performance)
  921       int n = maxDoc();
  922       if (deletedDocs != null)
  923         n -= deletedDocs.count();
  924       return n;
  925     }
  926   
  927     @Override
  928     public int maxDoc() {
  929       // Don't call ensureOpen() here (it could affect performance)
  930       return si.docCount;
  931     }
  932   
  933     /**
  934      * @see IndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)
  935      */
  936     @Override
  937     public Collection<String> getFieldNames(IndexReader.FieldOption fieldOption) {
  938       ensureOpen();
  939   
  940       Set<String> fieldSet = new HashSet<String>();
  941       for (int i = 0; i < core.fieldInfos.size(); i++) {
  942         FieldInfo fi = core.fieldInfos.fieldInfo(i);
  943         if (fieldOption == IndexReader.FieldOption.ALL) {
  944           fieldSet.add(fi.name);
  945         }
  946         else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
  947           fieldSet.add(fi.name);
  948         }
  949         else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) {
  950           fieldSet.add(fi.name);
  951         }
  952         else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
  953           fieldSet.add(fi.name);
  954         }
  955         else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) {
  956           fieldSet.add(fi.name);
  957         }
  958         else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
  959           fieldSet.add(fi.name);
  960         }
  961         else if (fi.storeTermVector == true &&
  962                  fi.storePositionWithTermVector == false &&
  963                  fi.storeOffsetWithTermVector == false &&
  964                  fieldOption == IndexReader.FieldOption.TERMVECTOR) {
  965           fieldSet.add(fi.name);
  966         }
  967         else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
  968           fieldSet.add(fi.name);
  969         }
  970         else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
  971           fieldSet.add(fi.name);
  972         }
  973         else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
  974           fieldSet.add(fi.name);
  975         }
  976         else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) &&
  977                   fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
  978           fieldSet.add(fi.name);
  979         }
  980       }
  981       return fieldSet;
  982     }
  983   
  984   
  985     @Override
  986     public synchronized boolean hasNorms(String field) {
  987       ensureOpen();
  988       return norms.containsKey(field);
  989     }
  990   
  991     // can return null if norms aren't stored
  992     protected synchronized byte[] getNorms(String field) throws IOException {
  993       Norm norm = norms.get(field);
  994       if (norm == null) return null;  // not indexed, or norms not stored
  995       return norm.bytes();
  996     }
  997   
  998     // returns fake norms if norms aren't available
  999     @Override
 1000     public synchronized byte[] norms(String field) throws IOException {
 1001       ensureOpen();
 1002       byte[] bytes = getNorms(field);
 1003       return bytes;
 1004     }
 1005   
 1006     @Override
 1007     protected void doSetNorm(int doc, String field, byte value)
 1008             throws IOException {
 1009       Norm norm = norms.get(field);
 1010       if (norm == null)                             // not an indexed field
 1011         return;
 1012   
 1013       normsDirty = true;
 1014       norm.copyOnWrite()[doc] = value;                    // set the value
 1015     }
 1016   
 1017     /** Read norms into a pre-allocated array. */
 1018     @Override
 1019     public synchronized void norms(String field, byte[] bytes, int offset)
 1020       throws IOException {
 1021   
 1022       ensureOpen();
 1023       Norm norm = norms.get(field);
 1024       if (norm == null) {
 1025         Arrays.fill(bytes, offset, bytes.length, DefaultSimilarity.encodeNorm(1.0f));
 1026         return;
 1027       }
 1028     
 1029       norm.bytes(bytes, offset, maxDoc());
 1030     }
 1031   
 1032   
 1033     private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
 1034       long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
 1035       int maxDoc = maxDoc();
 1036       for (int i = 0; i < core.fieldInfos.size(); i++) {
 1037         FieldInfo fi = core.fieldInfos.fieldInfo(i);
 1038         if (norms.containsKey(fi.name)) {
 1039           // in case this SegmentReader is being re-opened, we might be able to
 1040           // reuse some norm instances and skip loading them here
 1041           continue;
 1042         }
 1043         if (fi.isIndexed && !fi.omitNorms) {
 1044           Directory d = directory();
 1045           String fileName = si.getNormFileName(fi.number);
 1046           if (!si.hasSeparateNorms(fi.number)) {
 1047             d = cfsDir;
 1048           }
 1049           
 1050           // singleNormFile means multiple norms share this file
 1051           boolean singleNormFile = fileName.endsWith("." + IndexFileNames.NORMS_EXTENSION);
 1052           IndexInput normInput = null;
 1053           long normSeek;
 1054   
 1055           if (singleNormFile) {
 1056             normSeek = nextNormSeek;
 1057             if (singleNormStream == null) {
 1058               singleNormStream = d.openInput(fileName, readBufferSize);
 1059               singleNormRef = new Ref();
 1060             } else {
 1061               singleNormRef.incRef();
 1062             }
 1063             // All norms in the .nrm file can share a single IndexInput since
 1064             // they are only used in a synchronized context.
 1065             // If this were to change in the future, a clone could be done here.
 1066             normInput = singleNormStream;
 1067           } else {
 1068             normSeek = 0;
 1069             normInput = d.openInput(fileName);
 1070           }
 1071   
 1072           norms.put(fi.name, new Norm(normInput, fi.number, normSeek));
 1073           nextNormSeek += maxDoc; // increment also if some norms are separate
 1074         }
 1075       }
 1076     }
 1077   
 1078     boolean termsIndexLoaded() {
 1079       return core.termsIndexIsLoaded();
 1080     }
 1081   
 1082     // NOTE: only called from IndexWriter when a near
 1083     // real-time reader is opened, or applyDeletes is run,
 1084     // sharing a segment that's still being merged.  This
 1085     // method is not thread safe, and relies on the
 1086     // synchronization in IndexWriter
 1087     void loadTermsIndex(int termsIndexDivisor) throws IOException {
 1088       core.loadTermsIndex(si, termsIndexDivisor);
 1089     }
 1090   
 1091     // for testing only
 1092     boolean normsClosed() {
 1093       if (singleNormStream != null) {
 1094         return false;
 1095       }
 1096       for (final Norm norm : norms.values()) {
 1097         if (norm.refCount > 0) {
 1098           return false;
 1099         }
 1100       }
 1101       return true;
 1102     }
 1103   
 1104     // for testing only
 1105     boolean normsClosed(String field) {
 1106       return norms.get(field).refCount == 0;
 1107     }
 1108   
 1109     /**
 1110      * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
 1111      * @return TermVectorsReader
 1112      */
 1113     TermVectorsReader getTermVectorsReader() {
 1114       TermVectorsReader tvReader = termVectorsLocal.get();
 1115       if (tvReader == null) {
 1116         TermVectorsReader orig = core.getTermVectorsReaderOrig();
 1117         if (orig == null) {
 1118           return null;
 1119         } else {
 1120           try {
 1121             tvReader = (TermVectorsReader) orig.clone();
 1122           } catch (CloneNotSupportedException cnse) {
 1123             return null;
 1124           }
 1125         }
 1126         termVectorsLocal.set(tvReader);
 1127       }
 1128       return tvReader;
 1129     }
 1130   
 1131     TermVectorsReader getTermVectorsReaderOrig() {
 1132       return core.getTermVectorsReaderOrig();
 1133     }
 1134     
 1135     /** Return a term frequency vector for the specified document and field. The
 1136      *  vector returned contains term numbers and frequencies for all terms in
 1137      *  the specified field of this document, if the field had storeTermVector
 1138      *  flag set.  If the flag was not set, the method returns null.
 1139      * @throws IOException
 1140      */
 1141     @Override
 1142     public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
 1143       // Check if this field is invalid or has no stored term vector
 1144       ensureOpen();
 1145       FieldInfo fi = core.fieldInfos.fieldInfo(field);
 1146       if (fi == null || !fi.storeTermVector) 
 1147         return null;
 1148       
 1149       TermVectorsReader termVectorsReader = getTermVectorsReader();
 1150       if (termVectorsReader == null)
 1151         return null;
 1152       
 1153       return termVectorsReader.get(docNumber, field);
 1154     }
 1155   
 1156   
 1157     @Override
 1158     public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
 1159       ensureOpen();
 1160       FieldInfo fi = core.fieldInfos.fieldInfo(field);
 1161       if (fi == null || !fi.storeTermVector)
 1162         return;
 1163   
 1164       TermVectorsReader termVectorsReader = getTermVectorsReader();
 1165       if (termVectorsReader == null) {
 1166         return;
 1167       }
 1168   
 1169   
 1170       termVectorsReader.get(docNumber, field, mapper);
 1171     }
 1172   
 1173   
 1174     @Override
 1175     public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
 1176       ensureOpen();
 1177   
 1178       TermVectorsReader termVectorsReader = getTermVectorsReader();
 1179       if (termVectorsReader == null)
 1180         return;
 1181   
 1182       termVectorsReader.get(docNumber, mapper);
 1183     }
 1184   
 1185     /** Return an array of term frequency vectors for the specified document.
 1186      *  The array contains a vector for each vectorized field in the document.
 1187      *  Each vector vector contains term numbers and frequencies for all terms
 1188      *  in a given vectorized field.
 1189      *  If no such fields existed, the method returns null.
 1190      * @throws IOException
 1191      */
 1192     @Override
 1193     public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
 1194       ensureOpen();
 1195       
 1196       TermVectorsReader termVectorsReader = getTermVectorsReader();
 1197       if (termVectorsReader == null)
 1198         return null;
 1199       
 1200       return termVectorsReader.get(docNumber);
 1201     }
 1202     
 1203     /**
 1204      * Return the name of the segment this reader is reading.
 1205      */
 1206     public String getSegmentName() {
 1207       return core.segment;
 1208     }
 1209     
 1210     /**
 1211      * Return the SegmentInfo of the segment this reader is reading.
 1212      */
 1213     SegmentInfo getSegmentInfo() {
 1214       return si;
 1215     }
 1216   
 1217     void setSegmentInfo(SegmentInfo info) {
 1218       si = info;
 1219     }
 1220   
 1221     void startCommit() {
 1222       rollbackHasChanges = hasChanges;
 1223       rollbackDeletedDocsDirty = deletedDocsDirty;
 1224       rollbackNormsDirty = normsDirty;
 1225       rollbackPendingDeleteCount = pendingDeleteCount;
 1226       for (Norm norm : norms.values()) {
 1227         norm.rollbackDirty = norm.dirty;
 1228       }
 1229     }
 1230   
 1231     void rollbackCommit() {
 1232       hasChanges = rollbackHasChanges;
 1233       deletedDocsDirty = rollbackDeletedDocsDirty;
 1234       normsDirty = rollbackNormsDirty;
 1235       pendingDeleteCount = rollbackPendingDeleteCount;
 1236       for (Norm norm : norms.values()) {
 1237         norm.dirty = norm.rollbackDirty;
 1238       }
 1239     }
 1240   
 1241     /** Returns the directory this index resides in. */
 1242     @Override
 1243     public Directory directory() {
 1244       // Don't ensureOpen here -- in certain cases, when a
 1245       // cloned/reopened reader needs to commit, it may call
 1246       // this method on the closed original reader
 1247       return core.dir;
 1248     }
 1249   
 1250     // This is necessary so that cloned SegmentReaders (which
 1251     // share the underlying postings data) will map to the
 1252     // same entry in the FieldCache.  See LUCENE-1579.
 1253     @Override
 1254     public final Object getFieldCacheKey() {
 1255       return core.freqStream;
 1256     }
 1257   
 1258     @Override
 1259     public long getUniqueTermCount() {
 1260       return core.getTermsReader().size();
 1261     }
 1262   
 1263     /**
 1264      * Lotsa tests did hacks like:<br/>
 1265      * SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/>
 1266      * They broke. This method serves as a hack to keep hacks working
 1267      * We do it with R/W access for the tests (BW compatibility)
 1268      * @deprecated Remove this when tests are fixed!
 1269      */
 1270     static SegmentReader getOnlySegmentReader(Directory dir) throws IOException {
 1271       return getOnlySegmentReader(IndexReader.open(dir,false));
 1272     }
 1273   
 1274     static SegmentReader getOnlySegmentReader(IndexReader reader) {
 1275       if (reader instanceof SegmentReader)
 1276         return (SegmentReader) reader;
 1277   
 1278       if (reader instanceof DirectoryReader) {
 1279         IndexReader[] subReaders = reader.getSequentialSubReaders();
 1280         if (subReaders.length != 1)
 1281           throw new IllegalArgumentException(reader + " has " + subReaders.length + " segments instead of exactly one");
 1282   
 1283         return (SegmentReader) subReaders[0];
 1284       }
 1285   
 1286       throw new IllegalArgumentException(reader + " is not a SegmentReader or a single-segment DirectoryReader");
 1287     }
 1288   
 1289     @Override
 1290     public int getTermInfosIndexDivisor() {
 1291       return core.termsIndexDivisor;
 1292     }
 1293   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » index » [javadoc | source]