Save This Page
Home » lucene-2.3.2-src » org.apache » lucene » search » [javadoc | source]
    1   package org.apache.lucene.search;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.IOException;
   21   import java.util.ConcurrentModificationException;
   22   import java.util.Vector;
   23   import java.util.Iterator;
   24   
   25   import org.apache.lucene.document.Document;
   26   import org.apache.lucene.index.CorruptIndexException;
   27   
   28   /** A ranked list of documents, used to hold search results.
   29    * <p>
   30    * <b>Caution:</b> Iterate only over the hits needed.  Iterating over all
   31    * hits is generally not desirable and may be the source of
   32    * performance issues. If you need to iterate over many or all hits, consider
   33    * using the search method that takes a {@link HitCollector}.
   34    * </p>
   35    * <p><b>Note:</b> Deleting matching documents concurrently with traversing 
   36    * the hits, might, when deleting hits that were not yet retrieved, decrease
   37    * {@link #length()}. In such case, 
   38    * {@link java.util.ConcurrentModificationException ConcurrentModificationException}
   39    * is thrown when accessing hit <code>n</code> &ge; current_{@link #length()} 
   40    * (but <code>n</code> &lt; {@link #length()}_at_start). 
   41    */
   42   public final class Hits {
   43     private Weight weight;
   44     private Searcher searcher;
   45     private Filter filter = null;
   46     private Sort sort = null;
   47   
   48     private int length;				  // the total number of hits
   49     private Vector hitDocs = new Vector();	  // cache of hits retrieved
   50   
   51     private HitDoc first;         // head of LRU cache
   52     private HitDoc last;          // tail of LRU cache
   53     private int numDocs = 0;      // number cached
   54     private int maxDocs = 200;    // max to cache
   55     
   56     private int nDeletions;       // # deleted docs in the index.    
   57     private int lengthAtStart;    // this is the number apps usually count on (although deletions can bring it down). 
   58     private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
   59   
   60     boolean debugCheckedForDeletions = false; // for test purposes.
   61   
   62     Hits(Searcher s, Query q, Filter f) throws IOException {
   63       weight = q.weight(s);
   64       searcher = s;
   65       filter = f;
   66       nDeletions = countDeletions(s);
   67       getMoreDocs(50); // retrieve 100 initially
   68       lengthAtStart = length;
   69     }
   70   
   71     Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
   72       weight = q.weight(s);
   73       searcher = s;
   74       filter = f;
   75       sort = o;
   76       nDeletions = countDeletions(s);
   77       getMoreDocs(50); // retrieve 100 initially
   78       lengthAtStart = length;
   79     }
   80   
   81     // count # deletions, return -1 if unknown.
   82     private int countDeletions(Searcher s) throws IOException {
   83       int cnt = -1;
   84       if (s instanceof IndexSearcher) {
   85         cnt = s.maxDoc() - ((IndexSearcher) s).getIndexReader().numDocs(); 
   86       } 
   87       return cnt;
   88     }
   89   
   90     /**
   91      * Tries to add new documents to hitDocs.
   92      * Ensures that the hit numbered <code>min</code> has been retrieved.
   93      */
   94     private final void getMoreDocs(int min) throws IOException {
   95       if (hitDocs.size() > min) {
   96         min = hitDocs.size();
   97       }
   98   
   99       int n = min * 2;	// double # retrieved
  100       TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight, filter, n, sort);
  101       
  102       length = topDocs.totalHits;
  103       ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  104   
  105       float scoreNorm = 1.0f;
  106       
  107       if (length > 0 && topDocs.getMaxScore() > 1.0f) {
  108         scoreNorm = 1.0f / topDocs.getMaxScore();
  109       }
  110   
  111       int start = hitDocs.size() - nDeletedHits;
  112   
  113       // any new deletions?
  114       int nDels2 = countDeletions(searcher);
  115       debugCheckedForDeletions = false;
  116       if (nDeletions < 0 || nDels2 > nDeletions) { 
  117         // either we cannot count deletions, or some "previously valid hits" might have been deleted, so find exact start point
  118         nDeletedHits = 0;
  119         debugCheckedForDeletions = true;
  120         int i2 = 0;
  121         for (int i1=0; i1<hitDocs.size() && i2<scoreDocs.length; i1++) {
  122           int id1 = ((HitDoc)hitDocs.get(i1)).id;
  123           int id2 = scoreDocs[i2].doc;
  124           if (id1 == id2) {
  125             i2++;
  126           } else {
  127             nDeletedHits ++;
  128           }
  129         }
  130         start = i2;
  131       }
  132   
  133       int end = scoreDocs.length < length ? scoreDocs.length : length;
  134       length += nDeletedHits;
  135       for (int i = start; i < end; i++) {
  136         hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm,
  137                                       scoreDocs[i].doc));
  138       }
  139       
  140       nDeletions = nDels2;
  141     }
  142   
  143     /** Returns the total number of hits available in this set. */
  144     public final int length() {
  145       return length;
  146     }
  147   
  148     /** Returns the stored fields of the n<sup>th</sup> document in this set.
  149      * <p>Documents are cached, so that repeated requests for the same element may
  150      * return the same Document object.
  151      * @throws CorruptIndexException if the index is corrupt
  152      * @throws IOException if there is a low-level IO error
  153      */
  154     public final Document doc(int n) throws CorruptIndexException, IOException {
  155       HitDoc hitDoc = hitDoc(n);
  156   
  157       // Update LRU cache of documents
  158       remove(hitDoc);               // remove from list, if there
  159       addToFront(hitDoc);           // add to front of list
  160       if (numDocs > maxDocs) {      // if cache is full
  161         HitDoc oldLast = last;
  162         remove(last);             // flush last
  163         oldLast.doc = null;       // let doc get gc'd
  164       }
  165   
  166       if (hitDoc.doc == null) {
  167         hitDoc.doc = searcher.doc(hitDoc.id);  // cache miss: read document
  168       }
  169   
  170       return hitDoc.doc;
  171     }
  172   
  173     /** Returns the score for the n<sup>th</sup> document in this set. */
  174     public final float score(int n) throws IOException {
  175       return hitDoc(n).score;
  176     }
  177   
  178     /** Returns the id for the n<sup>th</sup> document in this set.
  179      * Note that ids may change when the index changes, so you cannot
  180      * rely on the id to be stable.
  181      */
  182     public final int id(int n) throws IOException {
  183       return hitDoc(n).id;
  184     }
  185   
  186     /**
  187      * Returns a {@link HitIterator} to navigate the Hits.  Each item returned
  188      * from {@link Iterator#next()} is a {@link Hit}.
  189      * <p>
  190      * <b>Caution:</b> Iterate only over the hits needed.  Iterating over all
  191      * hits is generally not desirable and may be the source of
  192      * performance issues. If you need to iterate over many or all hits, consider
  193      * using a search method that takes a {@link HitCollector}.
  194      * </p>
  195      */
  196     public Iterator iterator() {
  197       return new HitIterator(this);
  198     }
  199   
  200     private final HitDoc hitDoc(int n) throws IOException {
  201       if (n >= lengthAtStart) {
  202         throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
  203       }
  204   
  205       if (n >= hitDocs.size()) {
  206         getMoreDocs(n);
  207       }
  208   
  209       if (n >= length) {
  210         throw new ConcurrentModificationException("Not a valid hit number: " + n);
  211       }
  212       
  213       return (HitDoc) hitDocs.elementAt(n);
  214     }
  215   
  216     private final void addToFront(HitDoc hitDoc) {  // insert at front of cache
  217       if (first == null) {
  218         last = hitDoc;
  219       } else {
  220         first.prev = hitDoc;
  221       }
  222   
  223       hitDoc.next = first;
  224       first = hitDoc;
  225       hitDoc.prev = null;
  226   
  227       numDocs++;
  228     }
  229   
  230     private final void remove(HitDoc hitDoc) {	  // remove from cache
  231       if (hitDoc.doc == null) {     // it's not in the list
  232         return;					  // abort
  233       }
  234   
  235       if (hitDoc.next == null) {
  236         last = hitDoc.prev;
  237       } else {
  238         hitDoc.next.prev = hitDoc.prev;
  239       }
  240   
  241       if (hitDoc.prev == null) {
  242         first = hitDoc.next;
  243       } else {
  244         hitDoc.prev.next = hitDoc.next;
  245       }
  246   
  247       numDocs--;
  248     }
  249   }
  250   
  251   final class HitDoc {
  252     float score;
  253     int id;
  254     Document doc = null;
  255   
  256     HitDoc next;  // in doubly-linked cache
  257     HitDoc prev;  // in doubly-linked cache
  258   
  259     HitDoc(float s, int i) {
  260       score = s;
  261       id = i;
  262     }
  263   }

Save This Page
Home » lucene-2.3.2-src » org.apache » lucene » search » [javadoc | source]