Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » index » [javadoc | source]
    1   package org.apache.lucene.index;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.IOException;
   21   import java.util.ArrayList;
   22   import java.util.Arrays;
   23   import java.util.Collection;
   24   import java.util.HashMap;
   25   import java.util.HashSet;
   26   import java.util.Iterator;
   27   import java.util.List;
   28   import java.util.Map;
   29   import java.util.Set;
   30   
   31   import org.apache.lucene.document.Document;
   32   import org.apache.lucene.document.FieldSelector;
   33   import org.apache.lucene.search.DefaultSimilarity;
   34   import org.apache.lucene.store.BufferedIndexInput;
   35   import org.apache.lucene.store.Directory;
   36   import org.apache.lucene.store.IndexInput;
   37   import org.apache.lucene.store.IndexOutput;
   38   import org.apache.lucene.util.BitVector;
   39   import org.apache.lucene.util.CloseableThreadLocal;
   40   
   41   /**
   42    * @version $Id: SegmentReader.java 745797 2009-02-19 09:54:43Z mikemccand $
   43    */
   44   class SegmentReader extends DirectoryIndexReader {
   45     private String segment;
   46     private SegmentInfo si;
   47     private int readBufferSize;
   48   
   49     FieldInfos fieldInfos;
   50     private FieldsReader fieldsReader;
   51   
   52     TermInfosReader tis;
   53     TermVectorsReader termVectorsReaderOrig = null;
   54     CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal();
   55   
   56     BitVector deletedDocs = null;
   57     private boolean deletedDocsDirty = false;
   58     private boolean normsDirty = false;
   59     private boolean undeleteAll = false;
   60     private int pendingDeleteCount;
   61   
   62     private boolean rollbackDeletedDocsDirty = false;
   63     private boolean rollbackNormsDirty = false;
   64     private boolean rollbackUndeleteAll = false;
   65     private int rollbackPendingDeleteCount;
   66     private boolean readOnly;
   67   
   68     IndexInput freqStream;
   69     IndexInput proxStream;
   70   
   71     // optionally used for the .nrm file shared by multiple norms
   72     private IndexInput singleNormStream;
   73   
   74     // Compound File Reader when based on a compound file segment
   75     CompoundFileReader cfsReader = null;
   76     CompoundFileReader storeCFSReader = null;
   77     
   78     // indicates the SegmentReader with which the resources are being shared,
   79     // in case this is a re-opened reader
   80     private SegmentReader referencedSegmentReader = null;
   81     
   82     private class Norm {
   83       volatile int refCount;
   84       boolean useSingleNormStream;
   85       
   86       public synchronized void incRef() {
   87         assert refCount > 0;
   88         refCount++;
   89       }
   90   
   91       public synchronized void decRef() throws IOException {
   92         assert refCount > 0;
   93         if (refCount == 1) {
   94           close();
   95         }
   96         refCount--;
   97   
   98       }
   99       
  100       public Norm(IndexInput in, boolean useSingleNormStream, int number, long normSeek)
  101       {
  102         refCount = 1;
  103         this.in = in;
  104         this.number = number;
  105         this.normSeek = normSeek;
  106         this.useSingleNormStream = useSingleNormStream;
  107       }
  108   
  109       private IndexInput in;
  110       private byte[] bytes;
  111       private boolean dirty;
  112       private int number;
  113       private long normSeek;
  114       private boolean rollbackDirty;
  115   
  116       private void reWrite(SegmentInfo si) throws IOException {
  117         // NOTE: norms are re-written in regular directory, not cfs
  118         si.advanceNormGen(this.number);
  119         IndexOutput out = directory().createOutput(si.getNormFileName(this.number));
  120         try {
  121           out.writeBytes(bytes, maxDoc());
  122         } finally {
  123           out.close();
  124         }
  125         this.dirty = false;
  126       }
  127       
  128       /** Closes the underlying IndexInput for this norm.
  129        * It is still valid to access all other norm properties after close is called.
  130        * @throws IOException
  131        */
  132       private synchronized void close() throws IOException {
  133         if (in != null && !useSingleNormStream) {
  134           in.close();
  135         }
  136         in = null;
  137       }
  138     }
  139     
  140     /**
  141      * Increments the RC of this reader, as well as
  142      * of all norms this reader is using
  143      */
  144     public synchronized void incRef() {
  145       super.incRef();
  146       Iterator it = norms.values().iterator();
  147       while (it.hasNext()) {
  148         Norm norm = (Norm) it.next();
  149         norm.incRef();
  150       }
  151     }
  152     
  153     /**
  154      * only increments the RC of this reader, not tof 
  155      * he norms. This is important whenever a reopen()
  156      * creates a new SegmentReader that doesn't share
  157      * the norms with this one 
  158      */
  159     private synchronized void incRefReaderNotNorms() {
  160       super.incRef();
  161     }
  162   
  163     public synchronized void decRef() throws IOException {
  164       super.decRef();
  165       Iterator it = norms.values().iterator();
  166       while (it.hasNext()) {
  167         Norm norm = (Norm) it.next();
  168         norm.decRef();
  169       }
  170     }
  171     
  172     private synchronized void decRefReaderNotNorms() throws IOException {
  173       super.decRef();
  174     }
  175     
  176     Map norms = new HashMap();
  177     
  178     /** The class which implements SegmentReader. */
  179     private static Class IMPL;
  180     static {
  181       try {
  182         String name =
  183           System.getProperty("org.apache.lucene.SegmentReader.class",
  184                              SegmentReader.class.getName());
  185         IMPL = Class.forName(name);
  186       } catch (ClassNotFoundException e) {
  187         throw new RuntimeException("cannot load SegmentReader class: " + e, e);
  188       } catch (SecurityException se) {
  189         try {
  190           IMPL = Class.forName(SegmentReader.class.getName());
  191         } catch (ClassNotFoundException e) {
  192           throw new RuntimeException("cannot load default SegmentReader class: " + e, e);
  193         }
  194       }
  195     }
  196   
  197     private static Class READONLY_IMPL;
  198     static {
  199       try {
  200         String name =
  201           System.getProperty("org.apache.lucene.ReadOnlySegmentReader.class",
  202                              ReadOnlySegmentReader.class.getName());
  203         READONLY_IMPL = Class.forName(name);
  204       } catch (ClassNotFoundException e) {
  205         throw new RuntimeException("cannot load ReadOnlySegmentReader class: " + e, e);
  206       } catch (SecurityException se) {
  207         try {
  208           READONLY_IMPL = Class.forName(ReadOnlySegmentReader.class.getName());
  209         } catch (ClassNotFoundException e) {
  210           throw new RuntimeException("cannot load default ReadOnlySegmentReader class: " + e, e);
  211         }
  212       }
  213     }
  214   
  215     /**
  216      * @throws CorruptIndexException if the index is corrupt
  217      * @throws IOException if there is a low-level IO error
  218      */
  219     public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
  220       return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true);
  221     }
  222   
  223     /**
  224      * @throws CorruptIndexException if the index is corrupt
  225      * @throws IOException if there is a low-level IO error
  226      */
  227     public static SegmentReader get(boolean readOnly, SegmentInfo si) throws CorruptIndexException, IOException {
  228       return get(readOnly, si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true);
  229     }
  230   
  231     /**
  232      * @throws CorruptIndexException if the index is corrupt
  233      * @throws IOException if there is a low-level IO error
  234      */
  235     static SegmentReader get(SegmentInfo si, boolean doOpenStores) throws CorruptIndexException, IOException {
  236       return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, doOpenStores);
  237     }
  238   
  239     /**
  240      * @throws CorruptIndexException if the index is corrupt
  241      * @throws IOException if there is a low-level IO error
  242      */
  243     public static SegmentReader get(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException {
  244       return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, readBufferSize, true);
  245     }
  246   
  247     /**
  248      * @throws CorruptIndexException if the index is corrupt
  249      * @throws IOException if there is a low-level IO error
  250      */
  251     static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
  252       return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, readBufferSize, doOpenStores);
  253     }
  254   
  255     /**
  256      * @throws CorruptIndexException if the index is corrupt
  257      * @throws IOException if there is a low-level IO error
  258      */
  259     static SegmentReader get(boolean readOnly, SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
  260       return get(readOnly, si.dir, si, null, false, false, readBufferSize, doOpenStores);
  261     }
  262   
  263     /**
  264      * @throws CorruptIndexException if the index is corrupt
  265      * @throws IOException if there is a low-level IO error
  266      */
  267     public static SegmentReader get(boolean readOnly, SegmentInfos sis, SegmentInfo si,
  268                                     boolean closeDir) throws CorruptIndexException, IOException {
  269       return get(readOnly, si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE, true);
  270     }
  271   
  272     /**
  273      * @throws CorruptIndexException if the index is corrupt
  274      * @throws IOException if there is a low-level IO error
  275      */
  276     public static SegmentReader get(Directory dir, SegmentInfo si,
  277                                     SegmentInfos sis,
  278                                     boolean closeDir, boolean ownDir,
  279                                     int readBufferSize)
  280       throws CorruptIndexException, IOException {
  281       return get(READ_ONLY_DEFAULT, dir, si, sis, closeDir, ownDir, readBufferSize, true);
  282     }
  283   
  284     /**
  285      * @throws CorruptIndexException if the index is corrupt
  286      * @throws IOException if there is a low-level IO error
  287      */
  288     public static SegmentReader get(boolean readOnly,
  289                                     Directory dir,
  290                                     SegmentInfo si,
  291                                     SegmentInfos sis,
  292                                     boolean closeDir, boolean ownDir,
  293                                     int readBufferSize,
  294                                     boolean doOpenStores)
  295       throws CorruptIndexException, IOException {
  296       SegmentReader instance;
  297       try {
  298         if (readOnly)
  299           instance = (SegmentReader)READONLY_IMPL.newInstance();
  300         else
  301           instance = (SegmentReader)IMPL.newInstance();
  302       } catch (Exception e) {
  303         throw new RuntimeException("cannot load SegmentReader class: " + e, e);
  304       }
  305       instance.init(dir, sis, closeDir, readOnly);
  306       instance.initialize(si, readBufferSize, doOpenStores);
  307       return instance;
  308     }
  309   
  310     private void initialize(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
  311       segment = si.name;
  312       this.si = si;
  313       this.readBufferSize = readBufferSize;
  314   
  315       boolean success = false;
  316   
  317       try {
  318         // Use compound file directory for some files, if it exists
  319         Directory cfsDir = directory();
  320         if (si.getUseCompoundFile()) {
  321           cfsReader = new CompoundFileReader(directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
  322           cfsDir = cfsReader;
  323         }
  324   
  325         final Directory storeDir;
  326   
  327         if (doOpenStores) {
  328           if (si.getDocStoreOffset() != -1) {
  329             if (si.getDocStoreIsCompoundFile()) {
  330               storeCFSReader = new CompoundFileReader(directory(), si.getDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
  331               storeDir = storeCFSReader;
  332             } else {
  333               storeDir = directory();
  334             }
  335           } else {
  336             storeDir = cfsDir;
  337           }
  338         } else
  339           storeDir = null;
  340   
  341         fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
  342   
  343         boolean anyProx = false;
  344         final int numFields = fieldInfos.size();
  345         for(int i=0;!anyProx && i<numFields;i++)
  346           if (!fieldInfos.fieldInfo(i).omitTf)
  347             anyProx = true;
  348   
  349         final String fieldsSegment;
  350   
  351         if (si.getDocStoreOffset() != -1)
  352           fieldsSegment = si.getDocStoreSegment();
  353         else
  354           fieldsSegment = segment;
  355   
  356         if (doOpenStores) {
  357           fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize,
  358                                           si.getDocStoreOffset(), si.docCount);
  359   
  360           // Verify two sources of "maxDoc" agree:
  361           if (si.getDocStoreOffset() == -1 && fieldsReader.size() != si.docCount) {
  362             throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);
  363           }
  364         }
  365   
  366         tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);
  367         
  368         loadDeletedDocs();
  369   
  370         // make sure that all index files have been read or are kept open
  371         // so that if an index update removes them we'll still have them
  372         freqStream = cfsDir.openInput(segment + ".frq", readBufferSize);
  373         if (anyProx)
  374           proxStream = cfsDir.openInput(segment + ".prx", readBufferSize);
  375         openNorms(cfsDir, readBufferSize);
  376   
  377         if (doOpenStores && fieldInfos.hasVectors()) { // open term vector files only as needed
  378           final String vectorsSegment;
  379           if (si.getDocStoreOffset() != -1)
  380             vectorsSegment = si.getDocStoreSegment();
  381           else
  382             vectorsSegment = segment;
  383           termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount);
  384         }
  385         success = true;
  386       } finally {
  387   
  388         // With lock-less commits, it's entirely possible (and
  389         // fine) to hit a FileNotFound exception above.  In
  390         // this case, we want to explicitly close any subset
  391         // of things that were opened so that we don't have to
  392         // wait for a GC to do so.
  393         if (!success) {
  394           doClose();
  395         }
  396       }
  397     }
  398     
  399     private void loadDeletedDocs() throws IOException {
  400       // NOTE: the bitvector is stored using the regular directory, not cfs
  401       if (hasDeletions(si)) {
  402         deletedDocs = new BitVector(directory(), si.getDelFileName());
  403        
  404         assert si.getDelCount() == deletedDocs.count() : 
  405           "delete count mismatch: info=" + si.getDelCount() + " vs BitVector=" + deletedDocs.count();
  406   
  407         // Verify # deletes does not exceed maxDoc for this
  408         // segment:
  409         assert si.getDelCount() <= maxDoc() : 
  410           "delete count mismatch: " + deletedDocs.count() + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name;
  411   
  412       } else
  413         assert si.getDelCount() == 0;
  414     }
  415     
  416     protected synchronized DirectoryIndexReader doReopen(SegmentInfos infos) throws CorruptIndexException, IOException {
  417       DirectoryIndexReader newReader;
  418       
  419       if (infos.size() == 1) {
  420         SegmentInfo si = infos.info(0);
  421         if (segment.equals(si.name) && si.getUseCompoundFile() == SegmentReader.this.si.getUseCompoundFile()) {
  422           newReader = reopenSegment(si);
  423         } else { 
  424           // segment not referenced anymore, reopen not possible
  425           // or segment format changed
  426           newReader = SegmentReader.get(readOnly, infos, infos.info(0), false);
  427         }
  428       } else {
  429         if (readOnly)
  430           return new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] {this}, null, null);
  431         else
  432           return new MultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] {this}, null, null, false);
  433       }
  434       
  435       return newReader;
  436     }
  437     
  438     synchronized SegmentReader reopenSegment(SegmentInfo si) throws CorruptIndexException, IOException {
  439       boolean deletionsUpToDate = (this.si.hasDeletions() == si.hasDeletions()) 
  440                                     && (!si.hasDeletions() || this.si.getDelFileName().equals(si.getDelFileName()));
  441       boolean normsUpToDate = true;
  442   
  443       
  444       boolean[] fieldNormsChanged = new boolean[fieldInfos.size()];
  445       if (normsUpToDate) {
  446         for (int i = 0; i < fieldInfos.size(); i++) {
  447           if (!this.si.getNormFileName(i).equals(si.getNormFileName(i))) {
  448             normsUpToDate = false;
  449             fieldNormsChanged[i] = true;
  450           }
  451         }
  452       }
  453   
  454       if (normsUpToDate && deletionsUpToDate) {
  455         return this;
  456       }    
  457       
  458   
  459         // clone reader
  460       SegmentReader clone;
  461       if (readOnly) 
  462         clone = new ReadOnlySegmentReader();
  463       else
  464         clone = new SegmentReader();
  465   
  466       boolean success = false;
  467       try {
  468         clone.readOnly = readOnly;
  469         clone.directory = directory;
  470         clone.si = si;
  471         clone.segment = segment;
  472         clone.readBufferSize = readBufferSize;
  473         clone.cfsReader = cfsReader;
  474         clone.storeCFSReader = storeCFSReader;
  475     
  476         clone.fieldInfos = fieldInfos;
  477         clone.tis = tis;
  478         clone.freqStream = freqStream;
  479         clone.proxStream = proxStream;
  480         clone.termVectorsReaderOrig = termVectorsReaderOrig;
  481     
  482         
  483         // we have to open a new FieldsReader, because it is not thread-safe
  484         // and can thus not be shared among multiple SegmentReaders
  485         // TODO: Change this in case FieldsReader becomes thread-safe in the future
  486         final String fieldsSegment;
  487     
  488         Directory storeDir = directory();
  489         
  490         if (si.getDocStoreOffset() != -1) {
  491           fieldsSegment = si.getDocStoreSegment();
  492           if (storeCFSReader != null) {
  493             storeDir = storeCFSReader;
  494           }
  495         } else {
  496           fieldsSegment = segment;
  497           if (cfsReader != null) {
  498             storeDir = cfsReader;
  499           }
  500         }
  501     
  502         if (fieldsReader != null) {
  503           clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize,
  504                                           si.getDocStoreOffset(), si.docCount);
  505         }
  506         
  507         
  508         if (!deletionsUpToDate) {
  509           // load deleted docs
  510           clone.deletedDocs = null;
  511           clone.loadDeletedDocs();
  512         } else {
  513           clone.deletedDocs = this.deletedDocs;
  514         }
  515     
  516         clone.norms = new HashMap();
  517         if (!normsUpToDate) {
  518           // load norms
  519           for (int i = 0; i < fieldNormsChanged.length; i++) {
  520             // copy unchanged norms to the cloned reader and incRef those norms
  521             if (!fieldNormsChanged[i]) {
  522               String curField = fieldInfos.fieldInfo(i).name;
  523               Norm norm = (Norm) this.norms.get(curField);
  524               norm.incRef();
  525               clone.norms.put(curField, norm);
  526             }
  527           }
  528           
  529           clone.openNorms(si.getUseCompoundFile() ? cfsReader : directory(), readBufferSize);
  530         } else {
  531           Iterator it = norms.keySet().iterator();
  532           while (it.hasNext()) {
  533             String field = (String) it.next();
  534             Norm norm = (Norm) norms.get(field);
  535             norm.incRef();
  536             clone.norms.put(field, norm);
  537           }
  538         }
  539     
  540         if (clone.singleNormStream == null) {
  541           for (int i = 0; i < fieldInfos.size(); i++) {
  542             FieldInfo fi = fieldInfos.fieldInfo(i);
  543             if (fi.isIndexed && !fi.omitNorms) {
  544               Directory d = si.getUseCompoundFile() ? cfsReader : directory();
  545               String fileName = si.getNormFileName(fi.number);
  546               if (si.hasSeparateNorms(fi.number)) {
  547                 continue;
  548               }  
  549     
  550               if (fileName.endsWith("." + IndexFileNames.NORMS_EXTENSION)) {
  551                 clone.singleNormStream = d.openInput(fileName, readBufferSize);    
  552                 break;
  553               }
  554             }
  555           }  
  556         }    
  557     
  558         success = true;
  559       } finally {
  560         if (this.referencedSegmentReader != null) {
  561           // this reader shares resources with another SegmentReader,
  562           // so we increment the other readers refCount. We don't
  563           // increment the refCount of the norms because we did
  564           // that already for the shared norms
  565           clone.referencedSegmentReader = this.referencedSegmentReader;
  566           referencedSegmentReader.incRefReaderNotNorms();
  567         } else {
  568           // this reader wasn't reopened, so we increment this
  569           // readers refCount
  570           clone.referencedSegmentReader = this;
  571           incRefReaderNotNorms();
  572         }
  573         
  574         if (!success) {
  575           // An exception occured during reopen, we have to decRef the norms
  576           // that we incRef'ed already and close singleNormsStream and FieldsReader
  577           clone.decRef();
  578         }
  579       }
  580       
  581       return clone;
  582     }
  583   
  584     protected void commitChanges() throws IOException {
  585       if (deletedDocsDirty) {               // re-write deleted
  586         si.advanceDelGen();
  587   
  588         // We can write directly to the actual name (vs to a
  589         // .tmp & renaming it) because the file is not live
  590         // until segments file is written:
  591         deletedDocs.write(directory(), si.getDelFileName());
  592         
  593         si.setDelCount(si.getDelCount()+pendingDeleteCount);
  594         pendingDeleteCount = 0;
  595       }
  596       if (undeleteAll && si.hasDeletions()) {
  597         si.clearDelGen();
  598         si.setDelCount(0);
  599       }
  600       if (normsDirty) {               // re-write norms
  601         si.setNumFields(fieldInfos.size());
  602         Iterator it = norms.values().iterator();
  603         while (it.hasNext()) {
  604           Norm norm = (Norm) it.next();
  605           if (norm.dirty) {
  606             norm.reWrite(si);
  607           }
  608         }
  609       }
  610       deletedDocsDirty = false;
  611       normsDirty = false;
  612       undeleteAll = false;
  613     }
  614   
  615     FieldsReader getFieldsReader() {
  616       return fieldsReader;
  617     }
  618   
  619     protected void doClose() throws IOException {
  620       boolean hasReferencedReader = (referencedSegmentReader != null);
  621   
  622       termVectorsLocal.close();
  623   
  624       if (hasReferencedReader) {
  625         referencedSegmentReader.decRefReaderNotNorms();
  626         referencedSegmentReader = null;
  627       }
  628   
  629       deletedDocs = null;
  630   
  631       // close the single norms stream
  632       if (singleNormStream != null) {
  633         // we can close this stream, even if the norms
  634         // are shared, because every reader has it's own 
  635         // singleNormStream
  636         singleNormStream.close();
  637         singleNormStream = null;
  638       }
  639       
  640       // re-opened SegmentReaders have their own instance of FieldsReader
  641       if (fieldsReader != null) {
  642         fieldsReader.close();
  643       }
  644   
  645       if (!hasReferencedReader) { 
  646         // close everything, nothing is shared anymore with other readers
  647         if (tis != null) {
  648           tis.close();
  649         }
  650     
  651         if (freqStream != null)
  652           freqStream.close();
  653         if (proxStream != null)
  654           proxStream.close();
  655     
  656         if (termVectorsReaderOrig != null)
  657           termVectorsReaderOrig.close();
  658     
  659         if (cfsReader != null)
  660           cfsReader.close();
  661     
  662         if (storeCFSReader != null)
  663           storeCFSReader.close();
  664       }
  665   
  666       // In DirectoryIndexReader.reopen, our directory
  667       // instance was made private to us (cloned), so we
  668       // always call super.doClose to possibly close the
  669       // directory:
  670       super.doClose();
  671     }
  672   
  673     static boolean hasDeletions(SegmentInfo si) throws IOException {
  674       // Don't call ensureOpen() here (it could affect performance)
  675       return si.hasDeletions();
  676     }
  677   
  678     public boolean hasDeletions() {
  679       // Don't call ensureOpen() here (it could affect performance)
  680       return deletedDocs != null;
  681     }
  682   
  683     static boolean usesCompoundFile(SegmentInfo si) throws IOException {
  684       return si.getUseCompoundFile();
  685     }
  686   
  687     static boolean hasSeparateNorms(SegmentInfo si) throws IOException {
  688       return si.hasSeparateNorms();
  689     }
  690   
  691     protected void doDelete(int docNum) {
  692       if (deletedDocs == null)
  693         deletedDocs = new BitVector(maxDoc());
  694       deletedDocsDirty = true;
  695       undeleteAll = false;
  696       if (!deletedDocs.getAndSet(docNum))
  697         pendingDeleteCount++;
  698     }
  699   
  700     protected void doUndeleteAll() {
  701         deletedDocs = null;
  702         deletedDocsDirty = false;
  703         undeleteAll = true;
  704     }
  705   
  706     List files() throws IOException {
  707       return new ArrayList(si.files());
  708     }
  709   
  710     public TermEnum terms() {
  711       ensureOpen();
  712       return tis.terms();
  713     }
  714   
  715     public TermEnum terms(Term t) throws IOException {
  716       ensureOpen();
  717       return tis.terms(t);
  718     }
  719   
  720     FieldInfos getFieldInfos() {
  721       return fieldInfos;
  722     }
  723   
  724     /**
  725      * @throws CorruptIndexException if the index is corrupt
  726      * @throws IOException if there is a low-level IO error
  727      */
  728     public synchronized Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
  729       ensureOpen();
  730       if (isDeleted(n))
  731         throw new IllegalArgumentException
  732                 ("attempt to access a deleted document");
  733       return fieldsReader.doc(n, fieldSelector);
  734     }
  735   
  736     public synchronized boolean isDeleted(int n) {
  737       return (deletedDocs != null && deletedDocs.get(n));
  738     }
  739   
  740     public TermDocs termDocs() throws IOException {
  741       ensureOpen();
  742       return new SegmentTermDocs(this);
  743     }
  744   
  745     public TermPositions termPositions() throws IOException {
  746       ensureOpen();
  747       return new SegmentTermPositions(this);
  748     }
  749   
  750     public int docFreq(Term t) throws IOException {
  751       ensureOpen();
  752       TermInfo ti = tis.get(t);
  753       if (ti != null)
  754         return ti.docFreq;
  755       else
  756         return 0;
  757     }
  758   
  759     public int numDocs() {
  760       // Don't call ensureOpen() here (it could affect performance)
  761       int n = maxDoc();
  762       if (deletedDocs != null)
  763         n -= deletedDocs.count();
  764       return n;
  765     }
  766   
  767     public int maxDoc() {
  768       // Don't call ensureOpen() here (it could affect performance)
  769       return si.docCount;
  770     }
  771   
  772     public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
  773       tis.setIndexDivisor(indexDivisor);
  774     }
  775   
  776     public int getTermInfosIndexDivisor() {
  777       return tis.getIndexDivisor();
  778     }
  779   
  780     /**
  781      * @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
  782      */
  783     public Collection getFieldNames(IndexReader.FieldOption fieldOption) {
  784       ensureOpen();
  785   
  786       Set fieldSet = new HashSet();
  787       for (int i = 0; i < fieldInfos.size(); i++) {
  788         FieldInfo fi = fieldInfos.fieldInfo(i);
  789         if (fieldOption == IndexReader.FieldOption.ALL) {
  790           fieldSet.add(fi.name);
  791         }
  792         else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
  793           fieldSet.add(fi.name);
  794         }
  795         else if (fi.omitTf && fieldOption == IndexReader.FieldOption.OMIT_TF) {
  796           fieldSet.add(fi.name);
  797         }
  798         else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
  799           fieldSet.add(fi.name);
  800         }
  801         else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) {
  802           fieldSet.add(fi.name);
  803         }
  804         else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
  805           fieldSet.add(fi.name);
  806         }
  807         else if (fi.storeTermVector == true &&
  808                  fi.storePositionWithTermVector == false &&
  809                  fi.storeOffsetWithTermVector == false &&
  810                  fieldOption == IndexReader.FieldOption.TERMVECTOR) {
  811           fieldSet.add(fi.name);
  812         }
  813         else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
  814           fieldSet.add(fi.name);
  815         }
  816         else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
  817           fieldSet.add(fi.name);
  818         }
  819         else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
  820           fieldSet.add(fi.name);
  821         }
  822         else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) &&
  823                   fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
  824           fieldSet.add(fi.name);
  825         }
  826       }
  827       return fieldSet;
  828     }
  829   
  830   
  831     public synchronized boolean hasNorms(String field) {
  832       ensureOpen();
  833       return norms.containsKey(field);
  834     }
  835   
  836     static byte[] createFakeNorms(int size) {
  837       byte[] ones = new byte[size];
  838       Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f));
  839       return ones;
  840     }
  841   
  842     private byte[] ones;
  843     private byte[] fakeNorms() {
  844       if (ones==null) ones=createFakeNorms(maxDoc());
  845       return ones;
  846     }
  847   
  848     // can return null if norms aren't stored
  849     protected synchronized byte[] getNorms(String field) throws IOException {
  850       Norm norm = (Norm) norms.get(field);
  851       if (norm == null) return null;  // not indexed, or norms not stored
  852       synchronized(norm) {
  853         if (norm.bytes == null) {                     // value not yet read
  854           byte[] bytes = new byte[maxDoc()];
  855           norms(field, bytes, 0);
  856           norm.bytes = bytes;                         // cache it
  857           // it's OK to close the underlying IndexInput as we have cached the
  858           // norms and will never read them again.
  859           norm.close();
  860         }
  861         return norm.bytes;
  862       }
  863     }
  864   
  865     // returns fake norms if norms aren't available
  866     public synchronized byte[] norms(String field) throws IOException {
  867       ensureOpen();
  868       byte[] bytes = getNorms(field);
  869       if (bytes==null) bytes=fakeNorms();
  870       return bytes;
  871     }
  872   
  873     protected void doSetNorm(int doc, String field, byte value)
  874             throws IOException {
  875       Norm norm = (Norm) norms.get(field);
  876       if (norm == null)                             // not an indexed field
  877         return;
  878   
  879       norm.dirty = true;                            // mark it dirty
  880       normsDirty = true;
  881   
  882       norms(field)[doc] = value;                    // set the value
  883     }
  884   
  885     /** Read norms into a pre-allocated array. */
  886     public synchronized void norms(String field, byte[] bytes, int offset)
  887       throws IOException {
  888   
  889       ensureOpen();
  890       Norm norm = (Norm) norms.get(field);
  891       if (norm == null) {
  892         System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc());
  893         return;
  894       }
  895       
  896       synchronized(norm) {
  897         if (norm.bytes != null) {                     // can copy from cache
  898           System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc());
  899           return;
  900         }
  901   
  902       // Read from disk.  norm.in may be shared across  multiple norms and
  903       // should only be used in a synchronized context.
  904         IndexInput normStream;
  905         if (norm.useSingleNormStream) {
  906           normStream = singleNormStream;
  907         } else {
  908           normStream = norm.in;
  909         }
  910         normStream.seek(norm.normSeek);
  911         normStream.readBytes(bytes, offset, maxDoc());
  912       }
  913     }
  914   
  915   
  916     private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
  917       long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
  918       int maxDoc = maxDoc();
  919       for (int i = 0; i < fieldInfos.size(); i++) {
  920         FieldInfo fi = fieldInfos.fieldInfo(i);
  921         if (norms.containsKey(fi.name)) {
  922           // in case this SegmentReader is being re-opened, we might be able to
  923           // reuse some norm instances and skip loading them here
  924           continue;
  925         }
  926         if (fi.isIndexed && !fi.omitNorms) {
  927           Directory d = directory();
  928           String fileName = si.getNormFileName(fi.number);
  929           if (!si.hasSeparateNorms(fi.number)) {
  930             d = cfsDir;
  931           }
  932           
  933           // singleNormFile means multiple norms share this file
  934           boolean singleNormFile = fileName.endsWith("." + IndexFileNames.NORMS_EXTENSION);
  935           IndexInput normInput = null;
  936           long normSeek;
  937   
  938           if (singleNormFile) {
  939             normSeek = nextNormSeek;
  940             if (singleNormStream==null) {
  941               singleNormStream = d.openInput(fileName, readBufferSize);
  942             }
  943             // All norms in the .nrm file can share a single IndexInput since
  944             // they are only used in a synchronized context.
  945             // If this were to change in the future, a clone could be done here.
  946             normInput = singleNormStream;
  947           } else {
  948             normSeek = 0;
  949             normInput = d.openInput(fileName);
  950           }
  951   
  952           norms.put(fi.name, new Norm(normInput, singleNormFile, fi.number, normSeek));
  953           nextNormSeek += maxDoc; // increment also if some norms are separate
  954         }
  955       }
  956     }
  957   
  958     // for testing only
  959     boolean normsClosed() {
  960       if (singleNormStream != null) {
  961         return false;
  962       }
  963       Iterator it = norms.values().iterator();
  964       while (it.hasNext()) {
  965         Norm norm = (Norm) it.next();
  966         if (norm.refCount > 0) {
  967           return false;
  968         }
  969       }
  970       return true;
  971     }
  972     
  973     // for testing only
  974     boolean normsClosed(String field) {
  975         Norm norm = (Norm) norms.get(field);
  976         return norm.refCount == 0;
  977     }
  978   
  979     /**
  980      * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
  981      * @return TermVectorsReader
  982      */
  983     private TermVectorsReader getTermVectorsReader() {
  984       assert termVectorsReaderOrig != null;
  985       TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get();
  986       if (tvReader == null) {
  987         try {
  988           tvReader = (TermVectorsReader)termVectorsReaderOrig.clone();
  989         } catch (CloneNotSupportedException cnse) {
  990           return null;
  991         }
  992         termVectorsLocal.set(tvReader);
  993       }
  994       return tvReader;
  995     }
  996     
  997     /** Return a term frequency vector for the specified document and field. The
  998      *  vector returned contains term numbers and frequencies for all terms in
  999      *  the specified field of this document, if the field had storeTermVector
 1000      *  flag set.  If the flag was not set, the method returns null.
 1001      * @throws IOException
 1002      */
 1003     public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
 1004       // Check if this field is invalid or has no stored term vector
 1005       ensureOpen();
 1006       FieldInfo fi = fieldInfos.fieldInfo(field);
 1007       if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) 
 1008         return null;
 1009       
 1010       TermVectorsReader termVectorsReader = getTermVectorsReader();
 1011       if (termVectorsReader == null)
 1012         return null;
 1013       
 1014       return termVectorsReader.get(docNumber, field);
 1015     }
 1016   
 1017   
 1018     public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
 1019       ensureOpen();
 1020       FieldInfo fi = fieldInfos.fieldInfo(field);
 1021       if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
 1022         return;
 1023   
 1024       TermVectorsReader termVectorsReader = getTermVectorsReader();
 1025       if (termVectorsReader == null)
 1026       {
 1027         return;
 1028       }
 1029   
 1030   
 1031       termVectorsReader.get(docNumber, field, mapper);
 1032     }
 1033   
 1034   
 1035     public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
 1036       ensureOpen();
 1037       if (termVectorsReaderOrig == null)
 1038         return;
 1039   
 1040       TermVectorsReader termVectorsReader = getTermVectorsReader();
 1041       if (termVectorsReader == null)
 1042         return;
 1043   
 1044       termVectorsReader.get(docNumber, mapper);
 1045     }
 1046   
 1047     /** Return an array of term frequency vectors for the specified document.
 1048      *  The array contains a vector for each vectorized field in the document.
 1049      *  Each vector vector contains term numbers and frequencies for all terms
 1050      *  in a given vectorized field.
 1051      *  If no such fields existed, the method returns null.
 1052      * @throws IOException
 1053      */
 1054     public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
 1055       ensureOpen();
 1056       if (termVectorsReaderOrig == null)
 1057         return null;
 1058       
 1059       TermVectorsReader termVectorsReader = getTermVectorsReader();
 1060       if (termVectorsReader == null)
 1061         return null;
 1062       
 1063       return termVectorsReader.get(docNumber);
 1064     }
 1065     
 1066     /** Returns the field infos of this segment */
 1067     FieldInfos fieldInfos() {
 1068       return fieldInfos;
 1069     }
 1070     
 1071     /**
 1072      * Return the name of the segment this reader is reading.
 1073      */
 1074     String getSegmentName() {
 1075       return segment;
 1076     }
 1077     
 1078     /**
 1079      * Return the SegmentInfo of the segment this reader is reading.
 1080      */
 1081     SegmentInfo getSegmentInfo() {
 1082       return si;
 1083     }
 1084   
 1085     void setSegmentInfo(SegmentInfo info) {
 1086       si = info;
 1087     }
 1088   
 1089     void startCommit() {
 1090       super.startCommit();
 1091       rollbackDeletedDocsDirty = deletedDocsDirty;
 1092       rollbackNormsDirty = normsDirty;
 1093       rollbackUndeleteAll = undeleteAll;
 1094       rollbackPendingDeleteCount = pendingDeleteCount;
 1095       Iterator it = norms.values().iterator();
 1096       while (it.hasNext()) {
 1097         Norm norm = (Norm) it.next();
 1098         norm.rollbackDirty = norm.dirty;
 1099       }
 1100     }
 1101   
 1102     void rollbackCommit() {
 1103       super.rollbackCommit();
 1104       deletedDocsDirty = rollbackDeletedDocsDirty;
 1105       normsDirty = rollbackNormsDirty;
 1106       undeleteAll = rollbackUndeleteAll;
 1107       pendingDeleteCount = rollbackPendingDeleteCount;
 1108       Iterator it = norms.values().iterator();
 1109       while (it.hasNext()) {
 1110         Norm norm = (Norm) it.next();
 1111         norm.dirty = norm.rollbackDirty;
 1112       }
 1113     }
 1114   }

Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » index » [javadoc | source]