Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » search » [javadoc | source]
    1   package org.apache.lucene.search;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import org.apache.lucene.document.Document;
   21   import org.apache.lucene.document.FieldSelector;
   22   import org.apache.lucene.index.CorruptIndexException;
   23   import org.apache.lucene.index.Term;
   24   
   25   import java.io.IOException;
   26   import java.util.HashMap;
   27   import java.util.HashSet;
   28   import java.util.Map;
   29   import java.util.Set;
   30   
   31   /** Implements search over a set of <code>Searchables</code>.
   32    *
   33    * <p>Applications usually need only call the inherited {@link #search(Query)}
   34    * or {@link #search(Query,Filter)} methods.
   35    */
   36   public class MultiSearcher extends Searcher {
   37       /**
   38        * Document Frequency cache acting as a Dummy-Searcher.
   39        * This class is no full-fledged Searcher, but only supports
   40        * the methods necessary to initialize Weights.
   41        */
   42     private static class CachedDfSource extends Searcher {
   43       private Map dfMap; // Map from Terms to corresponding doc freqs
   44       private int maxDoc; // document count
   45   
   46       public CachedDfSource(Map dfMap, int maxDoc, Similarity similarity) {
   47         this.dfMap = dfMap;
   48         this.maxDoc = maxDoc;
   49         setSimilarity(similarity);
   50       }
   51   
   52       public int docFreq(Term term) {
   53         int df;
   54         try {
   55           df = ((Integer) dfMap.get(term)).intValue();
   56         } catch (NullPointerException e) {
   57           throw new IllegalArgumentException("df for term " + term.text()
   58               + " not available");
   59         }
   60         return df;
   61       }
   62   
   63       public int[] docFreqs(Term[] terms) {
   64         int[] result = new int[terms.length];
   65         for (int i = 0; i < terms.length; i++) {
   66           result[i] = docFreq(terms[i]);
   67         }
   68         return result;
   69       }
   70   
   71       public int maxDoc() {
   72         return maxDoc;
   73       }
   74   
   75       public Query rewrite(Query query) {
   76         // this is a bit of a hack. We know that a query which
   77         // creates a Weight based on this Dummy-Searcher is
   78         // always already rewritten (see preparedWeight()).
   79         // Therefore we just return the unmodified query here
   80         return query;
   81       }
   82   
   83       public void close() {
   84         throw new UnsupportedOperationException();
   85       }
   86   
   87       public Document doc(int i) {
   88         throw new UnsupportedOperationException();
   89       }
   90       
   91       public Document doc(int i, FieldSelector fieldSelector) {
   92           throw new UnsupportedOperationException();
   93       }
   94   
   95       public Explanation explain(Weight weight,int doc) {
   96         throw new UnsupportedOperationException();
   97       }
   98   
   99       public void search(Weight weight, Filter filter, HitCollector results) {
  100         throw new UnsupportedOperationException();
  101       }
  102   
  103       public TopDocs search(Weight weight,Filter filter,int n) {
  104         throw new UnsupportedOperationException();
  105       }
  106   
  107       public TopFieldDocs search(Weight weight,Filter filter,int n,Sort sort) {
  108         throw new UnsupportedOperationException();
  109       }
  110     }
  111   
  112   
  113     private Searchable[] searchables;
  114     private int[] starts;
  115     private int maxDoc = 0;
  116   
  117     /** Creates a searcher which searches <i>searchables</i>. */
  118     public MultiSearcher(Searchable[] searchables) throws IOException {
  119       this.searchables = searchables;
  120   
  121       starts = new int[searchables.length + 1];	  // build starts array
  122       for (int i = 0; i < searchables.length; i++) {
  123         starts[i] = maxDoc;
  124         maxDoc += searchables[i].maxDoc();          // compute maxDocs
  125       }
  126       starts[searchables.length] = maxDoc;
  127     }
  128     
  129     /** Return the array of {@link Searchable}s this searches. */
  130     public Searchable[] getSearchables() {
  131       return searchables;
  132     }
  133   
  134     protected int[] getStarts() {
  135     	return starts;
  136     }
  137   
  138     // inherit javadoc
  139     public void close() throws IOException {
  140       for (int i = 0; i < searchables.length; i++)
  141         searchables[i].close();
  142     }
  143   
  144     public int docFreq(Term term) throws IOException {
  145       int docFreq = 0;
  146       for (int i = 0; i < searchables.length; i++)
  147         docFreq += searchables[i].docFreq(term);
  148       return docFreq;
  149     }
  150   
  151     // inherit javadoc
  152     public Document doc(int n) throws CorruptIndexException, IOException {
  153       int i = subSearcher(n);			  // find searcher index
  154       return searchables[i].doc(n - starts[i]);	  // dispatch to searcher
  155     }
  156   
  157     // inherit javadoc
  158     public Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
  159       int i = subSearcher(n);			  // find searcher index
  160       return searchables[i].doc(n - starts[i], fieldSelector);	  // dispatch to searcher
  161     }
  162     
  163     /** Returns index of the searcher for document <code>n</code> in the array
  164      * used to construct this searcher. */
  165     public int subSearcher(int n) {                 // find searcher for doc n:
  166       // replace w/ call to Arrays.binarySearch in Java 1.2
  167       int lo = 0;					  // search starts array
  168       int hi = searchables.length - 1;		  // for first element less
  169   						  // than n, return its index
  170       while (hi >= lo) {
  171         int mid = (lo + hi) >>> 1;
  172         int midValue = starts[mid];
  173         if (n < midValue)
  174   	hi = mid - 1;
  175         else if (n > midValue)
  176   	lo = mid + 1;
  177         else {                                      // found a match
  178           while (mid+1 < searchables.length && starts[mid+1] == midValue) {
  179             mid++;                                  // scan to last match
  180           }
  181   	return mid;
  182         }
  183       }
  184       return hi;
  185     }
  186   
  187     /** Returns the document number of document <code>n</code> within its
  188      * sub-index. */
  189     public int subDoc(int n) {
  190       return n - starts[subSearcher(n)];
  191     }
  192   
  193     public int maxDoc() throws IOException {
  194       return maxDoc;
  195     }
  196   
  197     public TopDocs search(Weight weight, Filter filter, int nDocs)
  198     throws IOException {
  199   
  200       HitQueue hq = new HitQueue(nDocs);
  201       int totalHits = 0;
  202   
  203       for (int i = 0; i < searchables.length; i++) { // search each searcher
  204         TopDocs docs = searchables[i].search(weight, filter, nDocs);
  205         totalHits += docs.totalHits;		  // update totalHits
  206         ScoreDoc[] scoreDocs = docs.scoreDocs;
  207         for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
  208   	ScoreDoc scoreDoc = scoreDocs[j];
  209           scoreDoc.doc += starts[i];                // convert doc
  210           if(!hq.insert(scoreDoc))
  211               break;                                // no more scores > minScore
  212         }
  213       }
  214   
  215       ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
  216       for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
  217         scoreDocs[i] = (ScoreDoc)hq.pop();
  218       
  219       float maxScore = (totalHits==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
  220       
  221       return new TopDocs(totalHits, scoreDocs, maxScore);
  222     }
  223   
  224     public TopFieldDocs search (Weight weight, Filter filter, int n, Sort sort)
  225     throws IOException {
  226       FieldDocSortedHitQueue hq = null;
  227       int totalHits = 0;
  228   
  229       float maxScore=Float.NEGATIVE_INFINITY;
  230       
  231       for (int i = 0; i < searchables.length; i++) { // search each searcher
  232         TopFieldDocs docs = searchables[i].search (weight, filter, n, sort);
  233         
  234         if (hq == null) hq = new FieldDocSortedHitQueue (docs.fields, n);
  235         totalHits += docs.totalHits;		  // update totalHits
  236         maxScore = Math.max(maxScore, docs.getMaxScore());
  237         ScoreDoc[] scoreDocs = docs.scoreDocs;
  238         for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
  239           ScoreDoc scoreDoc = scoreDocs[j];
  240           scoreDoc.doc += starts[i];                // convert doc
  241           if (!hq.insert (scoreDoc))
  242             break;                                  // no more scores > minScore
  243         }
  244       }
  245   
  246       ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
  247       for (int i = hq.size() - 1; i >= 0; i--)	  // put docs in array
  248         scoreDocs[i] = (ScoreDoc) hq.pop();
  249   
  250       return new TopFieldDocs (totalHits, scoreDocs, hq.getFields(), maxScore);
  251     }
  252   
  253   
  254     // inherit javadoc
  255     public void search(Weight weight, Filter filter, final HitCollector results)
  256       throws IOException {
  257       for (int i = 0; i < searchables.length; i++) {
  258   
  259         final int start = starts[i];
  260   
  261         searchables[i].search(weight, filter, new HitCollector() {
  262   	  public void collect(int doc, float score) {
  263   	    results.collect(doc + start, score);
  264   	  }
  265   	});
  266   
  267       }
  268     }
  269   
  270     public Query rewrite(Query original) throws IOException {
  271       Query[] queries = new Query[searchables.length];
  272       for (int i = 0; i < searchables.length; i++) {
  273         queries[i] = searchables[i].rewrite(original);
  274       }
  275       return queries[0].combine(queries);
  276     }
  277   
  278     public Explanation explain(Weight weight, int doc) throws IOException {
  279       int i = subSearcher(doc);			  // find searcher index
  280       return searchables[i].explain(weight,doc-starts[i]); // dispatch to searcher
  281     }
  282   
  283     /**
  284      * Create weight in multiple index scenario.
  285      * 
  286      * Distributed query processing is done in the following steps:
  287      * 1. rewrite query
  288      * 2. extract necessary terms
  289      * 3. collect dfs for these terms from the Searchables
  290      * 4. create query weight using aggregate dfs.
  291      * 5. distribute that weight to Searchables
  292      * 6. merge results
  293      *
  294      * Steps 1-4 are done here, 5+6 in the search() methods
  295      *
  296      * @return rewritten queries
  297      */
  298     protected Weight createWeight(Query original) throws IOException {
  299       // step 1
  300       Query rewrittenQuery = rewrite(original);
  301   
  302       // step 2
  303       Set terms = new HashSet();
  304       rewrittenQuery.extractTerms(terms);
  305   
  306       // step3
  307       Term[] allTermsArray = new Term[terms.size()];
  308       terms.toArray(allTermsArray);
  309       int[] aggregatedDfs = new int[terms.size()];
  310       for (int i = 0; i < searchables.length; i++) {
  311         int[] dfs = searchables[i].docFreqs(allTermsArray);
  312         for(int j=0; j<aggregatedDfs.length; j++){
  313           aggregatedDfs[j] += dfs[j];
  314         }
  315       }
  316   
  317       HashMap dfMap = new HashMap();
  318       for(int i=0; i<allTermsArray.length; i++) {
  319         dfMap.put(allTermsArray[i], new Integer(aggregatedDfs[i]));
  320       }
  321   
  322       // step4
  323       int numDocs = maxDoc();
  324       CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, getSimilarity());
  325   
  326       return rewrittenQuery.weight(cacheSim);
  327     }
  328   
  329   }

Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » search » [javadoc | source]