Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » search » [javadoc | source]
    1   package org.apache.lucene.search;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.IOException;
   21   import java.util.ConcurrentModificationException;
   22   import java.util.Vector;
   23   import java.util.Iterator;
   24   
   25   import org.apache.lucene.document.Document;
   26   import org.apache.lucene.index.CorruptIndexException;
   27   
   28   /** A ranked list of documents, used to hold search results.
   29    * <p>
   30    * <b>Caution:</b> Iterate only over the hits needed.  Iterating over all
   31    * hits is generally not desirable and may be the source of
   32    * performance issues. If you need to iterate over many or all hits, consider
   33    * using the search method that takes a {@link HitCollector}.
   34    * </p>
   35    * <p><b>Note:</b> Deleting matching documents concurrently with traversing 
   36    * the hits, might, when deleting hits that were not yet retrieved, decrease
   37    * {@link #length()}. In such case, 
   38    * {@link java.util.ConcurrentModificationException ConcurrentModificationException}
   39    * is thrown when accessing hit <code>n</code> &ge; current_{@link #length()} 
   40    * (but <code>n</code> &lt; {@link #length()}_at_start). 
   41    * 
   42    * @deprecated Hits will be removed in Lucene 3.0. <p>
   43    * Instead e. g. {@link TopDocCollector} and {@link TopDocs} can be used:<br>
   44    * <pre>
   45    *   TopDocCollector collector = new TopDocCollector(hitsPerPage);
   46    *   searcher.search(query, collector);
   47    *   ScoreDoc[] hits = collector.topDocs().scoreDocs;
   48    *   for (int i = 0; i < hits.length; i++) {
   49    *     int docId = hits[i].doc;
   50    *     Document d = searcher.doc(docId);
   51    *     // do something with current hit
   52    *     ...
   53    * </pre>
   54    */
   55   public final class Hits {
   56     private Weight weight;
   57     private Searcher searcher;
   58     private Filter filter = null;
   59     private Sort sort = null;
   60   
   61     private int length;				  // the total number of hits
   62     private Vector hitDocs = new Vector();	  // cache of hits retrieved
   63   
   64     private HitDoc first;         // head of LRU cache
   65     private HitDoc last;          // tail of LRU cache
   66     private int numDocs = 0;      // number cached
   67     private int maxDocs = 200;    // max to cache
   68     
   69     private int nDeletions;       // # deleted docs in the index.    
   70     private int lengthAtStart;    // this is the number apps usually count on (although deletions can bring it down). 
   71     private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
   72   
   73     boolean debugCheckedForDeletions = false; // for test purposes.
   74   
   75     Hits(Searcher s, Query q, Filter f) throws IOException {
   76       weight = q.weight(s);
   77       searcher = s;
   78       filter = f;
   79       nDeletions = countDeletions(s);
   80       getMoreDocs(50); // retrieve 100 initially
   81       lengthAtStart = length;
   82     }
   83   
   84     Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
   85       weight = q.weight(s);
   86       searcher = s;
   87       filter = f;
   88       sort = o;
   89       nDeletions = countDeletions(s);
   90       getMoreDocs(50); // retrieve 100 initially
   91       lengthAtStart = length;
   92     }
   93   
   94     // count # deletions, return -1 if unknown.
   95     private int countDeletions(Searcher s) throws IOException {
   96       int cnt = -1;
   97       if (s instanceof IndexSearcher) {
   98         cnt = s.maxDoc() - ((IndexSearcher) s).getIndexReader().numDocs(); 
   99       } 
  100       return cnt;
  101     }
  102   
  103     /**
  104      * Tries to add new documents to hitDocs.
  105      * Ensures that the hit numbered <code>min</code> has been retrieved.
  106      */
  107     private final void getMoreDocs(int min) throws IOException {
  108       if (hitDocs.size() > min) {
  109         min = hitDocs.size();
  110       }
  111   
  112       int n = min * 2;	// double # retrieved
  113       TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight, filter, n, sort);
  114       
  115       length = topDocs.totalHits;
  116       ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  117   
  118       float scoreNorm = 1.0f;
  119       
  120       if (length > 0 && topDocs.getMaxScore() > 1.0f) {
  121         scoreNorm = 1.0f / topDocs.getMaxScore();
  122       }
  123   
  124       int start = hitDocs.size() - nDeletedHits;
  125   
  126       // any new deletions?
  127       int nDels2 = countDeletions(searcher);
  128       debugCheckedForDeletions = false;
  129       if (nDeletions < 0 || nDels2 > nDeletions) { 
  130         // either we cannot count deletions, or some "previously valid hits" might have been deleted, so find exact start point
  131         nDeletedHits = 0;
  132         debugCheckedForDeletions = true;
  133         int i2 = 0;
  134         for (int i1=0; i1<hitDocs.size() && i2<scoreDocs.length; i1++) {
  135           int id1 = ((HitDoc)hitDocs.get(i1)).id;
  136           int id2 = scoreDocs[i2].doc;
  137           if (id1 == id2) {
  138             i2++;
  139           } else {
  140             nDeletedHits ++;
  141           }
  142         }
  143         start = i2;
  144       }
  145   
  146       int end = scoreDocs.length < length ? scoreDocs.length : length;
  147       length += nDeletedHits;
  148       for (int i = start; i < end; i++) {
  149         hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm,
  150                                       scoreDocs[i].doc));
  151       }
  152       
  153       nDeletions = nDels2;
  154     }
  155   
  156     /** Returns the total number of hits available in this set. */
  157     public final int length() {
  158       return length;
  159     }
  160   
  161     /** Returns the stored fields of the n<sup>th</sup> document in this set.
  162      * <p>Documents are cached, so that repeated requests for the same element may
  163      * return the same Document object.
  164      * @throws CorruptIndexException if the index is corrupt
  165      * @throws IOException if there is a low-level IO error
  166      */
  167     public final Document doc(int n) throws CorruptIndexException, IOException {
  168       HitDoc hitDoc = hitDoc(n);
  169   
  170       // Update LRU cache of documents
  171       remove(hitDoc);               // remove from list, if there
  172       addToFront(hitDoc);           // add to front of list
  173       if (numDocs > maxDocs) {      // if cache is full
  174         HitDoc oldLast = last;
  175         remove(last);             // flush last
  176         oldLast.doc = null;       // let doc get gc'd
  177       }
  178   
  179       if (hitDoc.doc == null) {
  180         hitDoc.doc = searcher.doc(hitDoc.id);  // cache miss: read document
  181       }
  182   
  183       return hitDoc.doc;
  184     }
  185   
  186     /** Returns the score for the n<sup>th</sup> document in this set. */
  187     public final float score(int n) throws IOException {
  188       return hitDoc(n).score;
  189     }
  190   
  191     /** Returns the id for the n<sup>th</sup> document in this set.
  192      * Note that ids may change when the index changes, so you cannot
  193      * rely on the id to be stable.
  194      */
  195     public final int id(int n) throws IOException {
  196       return hitDoc(n).id;
  197     }
  198   
  199     /**
  200      * Returns a {@link HitIterator} to navigate the Hits.  Each item returned
  201      * from {@link Iterator#next()} is a {@link Hit}.
  202      * <p>
  203      * <b>Caution:</b> Iterate only over the hits needed.  Iterating over all
  204      * hits is generally not desirable and may be the source of
  205      * performance issues. If you need to iterate over many or all hits, consider
  206      * using a search method that takes a {@link HitCollector}.
  207      * </p>
  208      */
  209     public Iterator iterator() {
  210       return new HitIterator(this);
  211     }
  212   
  213     private final HitDoc hitDoc(int n) throws IOException {
  214       if (n >= lengthAtStart) {
  215         throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
  216       }
  217   
  218       if (n >= hitDocs.size()) {
  219         getMoreDocs(n);
  220       }
  221   
  222       if (n >= length) {
  223         throw new ConcurrentModificationException("Not a valid hit number: " + n);
  224       }
  225       
  226       return (HitDoc) hitDocs.elementAt(n);
  227     }
  228   
  229     private final void addToFront(HitDoc hitDoc) {  // insert at front of cache
  230       if (first == null) {
  231         last = hitDoc;
  232       } else {
  233         first.prev = hitDoc;
  234       }
  235   
  236       hitDoc.next = first;
  237       first = hitDoc;
  238       hitDoc.prev = null;
  239   
  240       numDocs++;
  241     }
  242   
  243     private final void remove(HitDoc hitDoc) {	  // remove from cache
  244       if (hitDoc.doc == null) {     // it's not in the list
  245         return;					  // abort
  246       }
  247   
  248       if (hitDoc.next == null) {
  249         last = hitDoc.prev;
  250       } else {
  251         hitDoc.next.prev = hitDoc.prev;
  252       }
  253   
  254       if (hitDoc.prev == null) {
  255         first = hitDoc.next;
  256       } else {
  257         hitDoc.prev.next = hitDoc.next;
  258       }
  259   
  260       numDocs--;
  261     }
  262   }
  263   
  264   final class HitDoc {
  265     float score;
  266     int id;
  267     Document doc = null;
  268   
  269     HitDoc next;  // in doubly-linked cache
  270     HitDoc prev;  // in doubly-linked cache
  271   
  272     HitDoc(float s, int i) {
  273       score = s;
  274       id = i;
  275     }
  276   }

Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » search » [javadoc | source]