Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » index » [javadoc | source]
    1   package org.apache.lucene.index;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.IOException;
   21   import java.util.Collection;
   22   import java.util.HashMap;
   23   import java.util.HashSet;
   24   import java.util.Iterator;
   25   import java.util.Map;
   26   import java.util.Set;
   27   
   28   import org.apache.lucene.document.Document;
   29   import org.apache.lucene.document.FieldSelector;
   30   import org.apache.lucene.store.Directory;
   31   
   32   /** 
   33    * An IndexReader which reads indexes with multiple segments.
   34    */
   35   class MultiSegmentReader extends DirectoryIndexReader {
   36     protected SegmentReader[] subReaders;
   37     private int[] starts;                           // 1st docno for each segment
   38     private Map normsCache = new HashMap();
   39     private int maxDoc = 0;
   40     private int numDocs = -1;
   41     private boolean hasDeletions = false;
   42   
   43     /** Construct reading the named set of readers. */
   44     MultiSegmentReader(Directory directory, SegmentInfos sis, boolean closeDirectory, boolean readOnly) throws IOException {
   45       super(directory, sis, closeDirectory, readOnly);
   46   
   47       // To reduce the chance of hitting FileNotFound
   48       // (and having to retry), we open segments in
   49       // reverse because IndexWriter merges & deletes
   50       // the newest segments first.
   51   
   52       SegmentReader[] readers = new SegmentReader[sis.size()];
   53       for (int i = sis.size()-1; i >= 0; i--) {
   54         try {
   55           readers[i] = SegmentReader.get(readOnly, sis.info(i));
   56         } catch (IOException e) {
   57           // Close all readers we had opened:
   58           for(i++;i<sis.size();i++) {
   59             try {
   60               readers[i].close();
   61             } catch (IOException ignore) {
   62               // keep going - we want to clean up as much as possible
   63             }
   64           }
   65           throw e;
   66         }
   67       }
   68   
   69       initialize(readers);
   70     }
   71   
   72     /** This contructor is only used for {@link #reopen()} */
   73     MultiSegmentReader(Directory directory, SegmentInfos infos, boolean closeDirectory, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean readOnly) throws IOException {
   74       super(directory, infos, closeDirectory, readOnly);
   75   
   76       // we put the old SegmentReaders in a map, that allows us
   77       // to lookup a reader using its segment name
   78       Map segmentReaders = new HashMap();
   79   
   80       if (oldReaders != null) {
   81         // create a Map SegmentName->SegmentReader
   82         for (int i = 0; i < oldReaders.length; i++) {
   83           segmentReaders.put(oldReaders[i].getSegmentName(), new Integer(i));
   84         }
   85       }
   86       
   87       SegmentReader[] newReaders = new SegmentReader[infos.size()];
   88       
   89       // remember which readers are shared between the old and the re-opened
   90       // MultiSegmentReader - we have to incRef those readers
   91       boolean[] readerShared = new boolean[infos.size()];
   92       
   93       for (int i = infos.size() - 1; i>=0; i--) {
   94         // find SegmentReader for this segment
   95         Integer oldReaderIndex = (Integer) segmentReaders.get(infos.info(i).name);
   96         if (oldReaderIndex == null) {
   97           // this is a new segment, no old SegmentReader can be reused
   98           newReaders[i] = null;
   99         } else {
  100           // there is an old reader for this segment - we'll try to reopen it
  101           newReaders[i] = oldReaders[oldReaderIndex.intValue()];
  102         }
  103   
  104         boolean success = false;
  105         try {
  106           SegmentReader newReader;
  107           if (newReaders[i] == null || infos.info(i).getUseCompoundFile() != newReaders[i].getSegmentInfo().getUseCompoundFile()) {
  108             // this is a new reader; in case we hit an exception we can close it safely
  109             newReader = SegmentReader.get(readOnly, infos.info(i));
  110           } else {
  111             newReader = (SegmentReader) newReaders[i].reopenSegment(infos.info(i));
  112           }
  113           if (newReader == newReaders[i]) {
  114             // this reader will be shared between the old and the new one,
  115             // so we must incRef it
  116             readerShared[i] = true;
  117             newReader.incRef();
  118           } else {
  119             readerShared[i] = false;
  120             newReaders[i] = newReader;
  121           }
  122           success = true;
  123         } finally {
  124           if (!success) {
  125             for (i++; i < infos.size(); i++) {
  126               if (newReaders[i] != null) {
  127                 try {
  128                   if (!readerShared[i]) {
  129                     // this is a new subReader that is not used by the old one,
  130                     // we can close it
  131                     newReaders[i].close();
  132                   } else {
  133                     // this subReader is also used by the old reader, so instead
  134                     // closing we must decRef it
  135                     newReaders[i].decRef();
  136                   }
  137                 } catch (IOException ignore) {
  138                   // keep going - we want to clean up as much as possible
  139                 }
  140               }
  141             }
  142           }
  143         }
  144       }    
  145       
  146       // initialize the readers to calculate maxDoc before we try to reuse the old normsCache
  147       initialize(newReaders);
  148       
  149       // try to copy unchanged norms from the old normsCache to the new one
  150       if (oldNormsCache != null) {
  151         Iterator it = oldNormsCache.entrySet().iterator();
  152         while (it.hasNext()) {
  153           Map.Entry entry = (Map.Entry) it.next();
  154           String field = (String) entry.getKey();
  155           if (!hasNorms(field)) {
  156             continue;
  157           }
  158   
  159           byte[] oldBytes = (byte[]) entry.getValue();
  160   
  161           byte[] bytes = new byte[maxDoc()];
  162   
  163           for (int i = 0; i < subReaders.length; i++) {
  164             Integer oldReaderIndex = ((Integer) segmentReaders.get(subReaders[i].getSegmentName()));
  165   
  166             // this SegmentReader was not re-opened, we can copy all of its norms 
  167             if (oldReaderIndex != null &&
  168                  (oldReaders[oldReaderIndex.intValue()] == subReaders[i] 
  169                    || oldReaders[oldReaderIndex.intValue()].norms.get(field) == subReaders[i].norms.get(field))) {
  170               // we don't have to synchronize here: either this constructor is called from a SegmentReader,
  171               // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
  172               // which is synchronized
  173               System.arraycopy(oldBytes, oldStarts[oldReaderIndex.intValue()], bytes, starts[i], starts[i+1] - starts[i]);
  174             } else {
  175               subReaders[i].norms(field, bytes, starts[i]);
  176             }
  177           }
  178   
  179           normsCache.put(field, bytes);      // update cache
  180         }
  181       }
  182     }
  183   
  184     private void initialize(SegmentReader[] subReaders) {
  185       this.subReaders = subReaders;
  186       starts = new int[subReaders.length + 1];    // build starts array
  187       for (int i = 0; i < subReaders.length; i++) {
  188         starts[i] = maxDoc;
  189         maxDoc += subReaders[i].maxDoc();      // compute maxDocs
  190   
  191         if (subReaders[i].hasDeletions())
  192           hasDeletions = true;
  193       }
  194       starts[subReaders.length] = maxDoc;
  195     }
  196   
  197     protected synchronized DirectoryIndexReader doReopen(SegmentInfos infos) throws CorruptIndexException, IOException {
  198       if (infos.size() == 1) {
  199         // The index has only one segment now, so we can't refresh the MultiSegmentReader.
  200         // Return a new [ReadOnly]SegmentReader instead
  201         return SegmentReader.get(readOnly, infos, infos.info(0), false);
  202       } else if (readOnly) {
  203         return new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache);
  204       } else {
  205         return new MultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache, false);
  206       }            
  207     }
  208   
  209     public TermFreqVector[] getTermFreqVectors(int n) throws IOException {
  210       ensureOpen();
  211       int i = readerIndex(n);        // find segment num
  212       return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment
  213     }
  214   
  215     public TermFreqVector getTermFreqVector(int n, String field)
  216         throws IOException {
  217       ensureOpen();
  218       int i = readerIndex(n);        // find segment num
  219       return subReaders[i].getTermFreqVector(n - starts[i], field);
  220     }
  221   
  222   
  223     public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
  224       ensureOpen();
  225       int i = readerIndex(docNumber);        // find segment num
  226       subReaders[i].getTermFreqVector(docNumber - starts[i], field, mapper);
  227     }
  228   
  229     public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
  230       ensureOpen();
  231       int i = readerIndex(docNumber);        // find segment num
  232       subReaders[i].getTermFreqVector(docNumber - starts[i], mapper);
  233     }
  234   
  235     public boolean isOptimized() {
  236       return false;
  237     }
  238     
  239     public synchronized int numDocs() {
  240       // Don't call ensureOpen() here (it could affect performance)
  241       if (numDocs == -1) {        // check cache
  242         int n = 0;                // cache miss--recompute
  243         for (int i = 0; i < subReaders.length; i++)
  244           n += subReaders[i].numDocs();      // sum from readers
  245         numDocs = n;
  246       }
  247       return numDocs;
  248     }
  249   
  250     public int maxDoc() {
  251       // Don't call ensureOpen() here (it could affect performance)
  252       return maxDoc;
  253     }
  254   
  255     // inherit javadoc
  256     public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
  257       ensureOpen();
  258       int i = readerIndex(n);                          // find segment num
  259       return subReaders[i].document(n - starts[i], fieldSelector);    // dispatch to segment reader
  260     }
  261   
  262     public boolean isDeleted(int n) {
  263       // Don't call ensureOpen() here (it could affect performance)
  264       final int i = readerIndex(n);                           // find segment num
  265       return subReaders[i].isDeleted(n - starts[i]);    // dispatch to segment reader
  266     }
  267   
  268     public boolean hasDeletions() {
  269       // Don't call ensureOpen() here (it could affect performance)
  270       return hasDeletions;
  271     }
  272   
  273     protected void doDelete(int n) throws CorruptIndexException, IOException {
  274       numDocs = -1;                             // invalidate cache
  275       int i = readerIndex(n);                   // find segment num
  276       subReaders[i].deleteDocument(n - starts[i]);      // dispatch to segment reader
  277       hasDeletions = true;
  278     }
  279   
  280     protected void doUndeleteAll() throws CorruptIndexException, IOException {
  281       for (int i = 0; i < subReaders.length; i++)
  282         subReaders[i].undeleteAll();
  283   
  284       hasDeletions = false;
  285       numDocs = -1;                                 // invalidate cache
  286     }
  287   
  288     private int readerIndex(int n) {    // find reader for doc n:
  289       return readerIndex(n, this.starts, this.subReaders.length);
  290     }
  291     
  292     final static int readerIndex(int n, int[] starts, int numSubReaders) {    // find reader for doc n:
  293       int lo = 0;                                      // search starts array
  294       int hi = numSubReaders - 1;                  // for first element less
  295   
  296       while (hi >= lo) {
  297         int mid = (lo + hi) >>> 1;
  298         int midValue = starts[mid];
  299         if (n < midValue)
  300           hi = mid - 1;
  301         else if (n > midValue)
  302           lo = mid + 1;
  303         else {                                      // found a match
  304           while (mid+1 < numSubReaders && starts[mid+1] == midValue) {
  305             mid++;                                  // scan to last match
  306           }
  307           return mid;
  308         }
  309       }
  310       return hi;
  311     }
  312   
  313     public boolean hasNorms(String field) throws IOException {
  314       ensureOpen();
  315       for (int i = 0; i < subReaders.length; i++) {
  316         if (subReaders[i].hasNorms(field)) return true;
  317       }
  318       return false;
  319     }
  320   
  321     private byte[] ones;
  322     private byte[] fakeNorms() {
  323       if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc());
  324       return ones;
  325     }
  326   
  327     public synchronized byte[] norms(String field) throws IOException {
  328       ensureOpen();
  329       byte[] bytes = (byte[])normsCache.get(field);
  330       if (bytes != null)
  331         return bytes;          // cache hit
  332       if (!hasNorms(field))
  333         return fakeNorms();
  334   
  335       bytes = new byte[maxDoc()];
  336       for (int i = 0; i < subReaders.length; i++)
  337         subReaders[i].norms(field, bytes, starts[i]);
  338       normsCache.put(field, bytes);      // update cache
  339       return bytes;
  340     }
  341   
  342     public synchronized void norms(String field, byte[] result, int offset)
  343       throws IOException {
  344       ensureOpen();
  345       byte[] bytes = (byte[])normsCache.get(field);
  346       if (bytes==null && !hasNorms(field)) bytes=fakeNorms();
  347       if (bytes != null)                            // cache hit
  348         System.arraycopy(bytes, 0, result, offset, maxDoc());
  349   
  350       for (int i = 0; i < subReaders.length; i++)      // read from segments
  351         subReaders[i].norms(field, result, offset + starts[i]);
  352     }
  353   
  354     protected void doSetNorm(int n, String field, byte value)
  355       throws CorruptIndexException, IOException {
  356       synchronized (normsCache) {
  357         normsCache.remove(field);                         // clear cache      
  358       }
  359       int i = readerIndex(n);                           // find segment num
  360       subReaders[i].setNorm(n-starts[i], field, value); // dispatch
  361     }
  362   
  363     public TermEnum terms() throws IOException {
  364       ensureOpen();
  365       return new MultiTermEnum(subReaders, starts, null);
  366     }
  367   
  368     public TermEnum terms(Term term) throws IOException {
  369       ensureOpen();
  370       return new MultiTermEnum(subReaders, starts, term);
  371     }
  372   
  373     public int docFreq(Term t) throws IOException {
  374       ensureOpen();
  375       int total = 0;          // sum freqs in segments
  376       for (int i = 0; i < subReaders.length; i++)
  377         total += subReaders[i].docFreq(t);
  378       return total;
  379     }
  380   
  381     public TermDocs termDocs() throws IOException {
  382       ensureOpen();
  383       return new MultiTermDocs(subReaders, starts);
  384     }
  385   
  386     public TermPositions termPositions() throws IOException {
  387       ensureOpen();
  388       return new MultiTermPositions(subReaders, starts);
  389     }
  390   
  391     protected void commitChanges() throws IOException {
  392       for (int i = 0; i < subReaders.length; i++)
  393         subReaders[i].commit();
  394     }
  395   
  396     void startCommit() {
  397       super.startCommit();
  398       for (int i = 0; i < subReaders.length; i++) {
  399         subReaders[i].startCommit();
  400       }
  401     }
  402   
  403     void rollbackCommit() {
  404       super.rollbackCommit();
  405       for (int i = 0; i < subReaders.length; i++) {
  406         subReaders[i].rollbackCommit();
  407       }
  408     }
  409   
  410     protected synchronized void doClose() throws IOException {
  411       for (int i = 0; i < subReaders.length; i++)
  412         subReaders[i].decRef();
  413       
  414       // maybe close directory
  415       super.doClose();
  416     }
  417   
  418     public Collection getFieldNames (IndexReader.FieldOption fieldNames) {
  419       ensureOpen();
  420       return getFieldNames(fieldNames, this.subReaders);
  421     }
  422     
  423     static Collection getFieldNames (IndexReader.FieldOption fieldNames, IndexReader[] subReaders) {
  424       // maintain a unique set of field names
  425       Set fieldSet = new HashSet();
  426       for (int i = 0; i < subReaders.length; i++) {
  427         IndexReader reader = subReaders[i];
  428         Collection names = reader.getFieldNames(fieldNames);
  429         fieldSet.addAll(names);
  430       }
  431       return fieldSet;
  432     } 
  433     
  434     // for testing
  435     SegmentReader[] getSubReaders() {
  436       return subReaders;
  437     }
  438   
  439     public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
  440       for (int i = 0; i < subReaders.length; i++)
  441         subReaders[i].setTermInfosIndexDivisor(indexDivisor);
  442     }
  443   
  444     public int getTermInfosIndexDivisor() throws IllegalStateException {
  445       if (subReaders.length > 0)
  446         return subReaders[0].getTermInfosIndexDivisor();
  447       else
  448         throw new IllegalStateException("no readers");
  449     }
  450   
  451     static class MultiTermEnum extends TermEnum {
  452       private SegmentMergeQueue queue;
  453     
  454       private Term term;
  455       private int docFreq;
  456     
  457       public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
  458         throws IOException {
  459         queue = new SegmentMergeQueue(readers.length);
  460         for (int i = 0; i < readers.length; i++) {
  461           IndexReader reader = readers[i];
  462           TermEnum termEnum;
  463     
  464           if (t != null) {
  465             termEnum = reader.terms(t);
  466           } else
  467             termEnum = reader.terms();
  468     
  469           SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
  470           if (t == null ? smi.next() : termEnum.term() != null)
  471             queue.put(smi);          // initialize queue
  472           else
  473             smi.close();
  474         }
  475     
  476         if (t != null && queue.size() > 0) {
  477           next();
  478         }
  479       }
  480     
  481       public boolean next() throws IOException {
  482         SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
  483         if (top == null) {
  484           term = null;
  485           return false;
  486         }
  487     
  488         term = top.term;
  489         docFreq = 0;
  490     
  491         while (top != null && term.compareTo(top.term) == 0) {
  492           queue.pop();
  493           docFreq += top.termEnum.docFreq();    // increment freq
  494           if (top.next())
  495             queue.put(top);          // restore queue
  496           else
  497             top.close();          // done with a segment
  498           top = (SegmentMergeInfo)queue.top();
  499         }
  500         return true;
  501       }
  502     
  503       public Term term() {
  504         return term;
  505       }
  506     
  507       public int docFreq() {
  508         return docFreq;
  509       }
  510     
  511       public void close() throws IOException {
  512         queue.close();
  513       }
  514     }
  515   
  516     static class MultiTermDocs implements TermDocs {
  517       protected IndexReader[] readers;
  518       protected int[] starts;
  519       protected Term term;
  520     
  521       protected int base = 0;
  522       protected int pointer = 0;
  523     
  524       private TermDocs[] readerTermDocs;
  525       protected TermDocs current;              // == readerTermDocs[pointer]
  526     
  527       public MultiTermDocs(IndexReader[] r, int[] s) {
  528         readers = r;
  529         starts = s;
  530     
  531         readerTermDocs = new TermDocs[r.length];
  532       }
  533     
  534       public int doc() {
  535         return base + current.doc();
  536       }
  537       public int freq() {
  538         return current.freq();
  539       }
  540     
  541       public void seek(Term term) {
  542         this.term = term;
  543         this.base = 0;
  544         this.pointer = 0;
  545         this.current = null;
  546       }
  547     
  548       public void seek(TermEnum termEnum) throws IOException {
  549         seek(termEnum.term());
  550       }
  551     
  552       public boolean next() throws IOException {
  553         for(;;) {
  554           if (current!=null && current.next()) {
  555             return true;
  556           }
  557           else if (pointer < readers.length) {
  558             base = starts[pointer];
  559             current = termDocs(pointer++);
  560           } else {
  561             return false;
  562           }
  563         }
  564       }
  565     
  566       /** Optimized implementation. */
  567       public int read(final int[] docs, final int[] freqs) throws IOException {
  568         while (true) {
  569           while (current == null) {
  570             if (pointer < readers.length) {      // try next segment
  571               base = starts[pointer];
  572               current = termDocs(pointer++);
  573             } else {
  574               return 0;
  575             }
  576           }
  577           int end = current.read(docs, freqs);
  578           if (end == 0) {          // none left in segment
  579             current = null;
  580           } else {            // got some
  581             final int b = base;        // adjust doc numbers
  582             for (int i = 0; i < end; i++)
  583              docs[i] += b;
  584             return end;
  585           }
  586         }
  587       }
  588     
  589      /* A Possible future optimization could skip entire segments */ 
  590       public boolean skipTo(int target) throws IOException {
  591         for(;;) {
  592           if (current != null && current.skipTo(target-base)) {
  593             return true;
  594           } else if (pointer < readers.length) {
  595             base = starts[pointer];
  596             current = termDocs(pointer++);
  597           } else
  598             return false;
  599         }
  600       }
  601     
  602       private TermDocs termDocs(int i) throws IOException {
  603         if (term == null)
  604           return null;
  605         TermDocs result = readerTermDocs[i];
  606         if (result == null)
  607           result = readerTermDocs[i] = termDocs(readers[i]);
  608         result.seek(term);
  609         return result;
  610       }
  611     
  612       protected TermDocs termDocs(IndexReader reader)
  613         throws IOException {
  614         return reader.termDocs();
  615       }
  616     
  617       public void close() throws IOException {
  618         for (int i = 0; i < readerTermDocs.length; i++) {
  619           if (readerTermDocs[i] != null)
  620             readerTermDocs[i].close();
  621         }
  622       }
  623     }
  624   
  625     static class MultiTermPositions extends MultiTermDocs implements TermPositions {
  626       public MultiTermPositions(IndexReader[] r, int[] s) {
  627         super(r,s);
  628       }
  629     
  630       protected TermDocs termDocs(IndexReader reader) throws IOException {
  631         return (TermDocs)reader.termPositions();
  632       }
  633     
  634       public int nextPosition() throws IOException {
  635         return ((TermPositions)current).nextPosition();
  636       }
  637       
  638       public int getPayloadLength() {
  639         return ((TermPositions)current).getPayloadLength();
  640       }
  641        
  642       public byte[] getPayload(byte[] data, int offset) throws IOException {
  643         return ((TermPositions)current).getPayload(data, offset);
  644       }
  645     
  646     
  647       // TODO: Remove warning after API has been finalized
  648       public boolean isPayloadAvailable() {
  649         return ((TermPositions) current).isPayloadAvailable();
  650       }
  651     }
  652   }

Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » index » [javadoc | source]