Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » search » [javadoc | source]
    1   package org.apache.lucene.search;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import org.apache.lucene.index.IndexReader;
   21   import org.apache.lucene.index.Term;
   22   import org.apache.lucene.util.ToStringUtils;
   23   import org.apache.lucene.search.BooleanClause.Occur;
   24   
   25   import java.io.IOException;
   26   import java.util;
   27   
   28   /** A Query that matches documents matching boolean combinations of other
   29     * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
   30     * BooleanQuerys.
   31     */
   32   public class BooleanQuery extends Query implements Iterable<BooleanClause> {
   33   
   34     private static int maxClauseCount = 1024;
   35   
   36     /** Thrown when an attempt is made to add more than {@link
   37      * #getMaxClauseCount()} clauses. This typically happens if
   38      * a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery 
   39      * is expanded to many terms during search. 
   40      */
   41     public static class TooManyClauses extends RuntimeException {
   42       public TooManyClauses() {}
   43       @Override
   44       public String getMessage() {
   45         return "maxClauseCount is set to " + maxClauseCount;
   46       }
   47     }
   48   
   49     /** Return the maximum number of clauses permitted, 1024 by default.
   50      * Attempts to add more than the permitted number of clauses cause {@link
   51      * TooManyClauses} to be thrown.
   52      * @see #setMaxClauseCount(int)
   53      */
   54     public static int getMaxClauseCount() { return maxClauseCount; }
   55   
   56     /** 
   57      * Set the maximum number of clauses permitted per BooleanQuery.
   58      * Default value is 1024.
   59      */
   60     public static void setMaxClauseCount(int maxClauseCount) {
   61       if (maxClauseCount < 1)
   62         throw new IllegalArgumentException("maxClauseCount must be >= 1");
   63       BooleanQuery.maxClauseCount = maxClauseCount;
   64     }
   65   
   66     private ArrayList<BooleanClause> clauses = new ArrayList<BooleanClause>();
   67     private boolean disableCoord;
   68   
   69     /** Constructs an empty boolean query. */
   70     public BooleanQuery() {}
   71   
   72     /** Constructs an empty boolean query.
   73      *
   74      * {@link Similarity#coord(int,int)} may be disabled in scoring, as
   75      * appropriate. For example, this score factor does not make sense for most
   76      * automatically generated queries, like {@link WildcardQuery} and {@link
   77      * FuzzyQuery}.
   78      *
   79      * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
   80      */
   81     public BooleanQuery(boolean disableCoord) {
   82       this.disableCoord = disableCoord;
   83     }
   84   
   85     /** Returns true iff {@link Similarity#coord(int,int)} is disabled in
   86      * scoring for this query instance.
   87      * @see #BooleanQuery(boolean)
   88      */
   89     public boolean isCoordDisabled() { return disableCoord; }
   90   
   91     // Implement coord disabling.
   92     // Inherit javadoc.
   93     @Override
   94     public Similarity getSimilarity(Searcher searcher) {
   95       Similarity result = super.getSimilarity(searcher);
   96       if (disableCoord) {                           // disable coord as requested
   97         result = new SimilarityDelegator(result) {
   98             @Override
   99             public float coord(int overlap, int maxOverlap) {
  100               return 1.0f;
  101             }
  102           };
  103       }
  104       return result;
  105     }
  106   
  107     /**
  108      * Specifies a minimum number of the optional BooleanClauses
  109      * which must be satisfied.
  110      *
  111      * <p>
  112      * By default no optional clauses are necessary for a match
  113      * (unless there are no required clauses).  If this method is used,
  114      * then the specified number of clauses is required.
  115      * </p>
  116      * <p>
  117      * Use of this method is totally independent of specifying that
  118      * any specific clauses are required (or prohibited).  This number will
  119      * only be compared against the number of matching optional clauses.
  120      * </p>
  121      *
  122      * @param min the number of optional clauses that must match
  123      */
  124     public void setMinimumNumberShouldMatch(int min) {
  125       this.minNrShouldMatch = min;
  126     }
  127     protected int minNrShouldMatch = 0;
  128   
  129     /**
  130      * Gets the minimum number of the optional BooleanClauses
  131      * which must be satisfied.
  132      */
  133     public int getMinimumNumberShouldMatch() {
  134       return minNrShouldMatch;
  135     }
  136   
  137     /** Adds a clause to a boolean query.
  138      *
  139      * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
  140      * @see #getMaxClauseCount()
  141      */
  142     public void add(Query query, BooleanClause.Occur occur) {
  143       add(new BooleanClause(query, occur));
  144     }
  145   
  146     /** Adds a clause to a boolean query.
  147      * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
  148      * @see #getMaxClauseCount()
  149      */
  150     public void add(BooleanClause clause) {
  151       if (clauses.size() >= maxClauseCount)
  152         throw new TooManyClauses();
  153   
  154       clauses.add(clause);
  155     }
  156   
  157     /** Returns the set of clauses in this query. */
  158     public BooleanClause[] getClauses() {
  159       return clauses.toArray(new BooleanClause[clauses.size()]);
  160     }
  161   
  162     /** Returns the list of clauses in this query. */
  163     public List<BooleanClause> clauses() { return clauses; }
  164   
  165     /** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to
  166      * make it possible to do:
  167      * <pre>for (BooleanClause clause : booleanQuery) {}</pre>
  168      */
  169     public final Iterator<BooleanClause> iterator() { return clauses().iterator(); }
  170   
  171     /**
  172      * Expert: the Weight for BooleanQuery, used to
  173      * normalize, score and explain these queries.
  174      *
  175      * <p>NOTE: this API and implementation is subject to
  176      * change suddenly in the next release.</p>
  177      */
  178     protected class BooleanWeight extends Weight {
  179       /** The Similarity implementation. */
  180       protected Similarity similarity;
  181       protected ArrayList<Weight> weights;
  182   
  183       public BooleanWeight(Searcher searcher)
  184         throws IOException {
  185         this.similarity = getSimilarity(searcher);
  186         weights = new ArrayList<Weight>(clauses.size());
  187         for (int i = 0 ; i < clauses.size(); i++) {
  188           weights.add(clauses.get(i).getQuery().createWeight(searcher));
  189         }
  190       }
  191   
  192       @Override
  193       public Query getQuery() { return BooleanQuery.this; }
  194   
  195       @Override
  196       public float getValue() { return getBoost(); }
  197   
  198       @Override
  199       public float sumOfSquaredWeights() throws IOException {
  200         float sum = 0.0f;
  201         for (int i = 0 ; i < weights.size(); i++) {
  202           // call sumOfSquaredWeights for all clauses in case of side effects
  203           float s = weights.get(i).sumOfSquaredWeights();         // sum sub weights
  204           if (!clauses.get(i).isProhibited())
  205             // only add to sum for non-prohibited clauses
  206             sum += s;
  207         }
  208   
  209         sum *= getBoost() * getBoost();             // boost each sub-weight
  210   
  211         return sum ;
  212       }
  213   
  214   
  215       @Override
  216       public void normalize(float norm) {
  217         norm *= getBoost();                         // incorporate boost
  218         for (Weight w : weights) {
  219           // normalize all clauses, (even if prohibited in case of side affects)
  220           w.normalize(norm);
  221         }
  222       }
  223   
  224       @Override
  225       public Explanation explain(IndexReader reader, int doc)
  226         throws IOException {
  227         final int minShouldMatch =
  228           BooleanQuery.this.getMinimumNumberShouldMatch();
  229         ComplexExplanation sumExpl = new ComplexExplanation();
  230         sumExpl.setDescription("sum of:");
  231         int coord = 0;
  232         int maxCoord = 0;
  233         float sum = 0.0f;
  234         boolean fail = false;
  235         int shouldMatchCount = 0;
  236         Iterator<BooleanClause> cIter = clauses.iterator();
  237         for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) {
  238           Weight w = wIter.next();
  239           BooleanClause c = cIter.next();
  240           if (w.scorer(reader, true, true) == null) {
  241             continue;
  242           }
  243           Explanation e = w.explain(reader, doc);
  244           if (!c.isProhibited()) maxCoord++;
  245           if (e.isMatch()) {
  246             if (!c.isProhibited()) {
  247               sumExpl.addDetail(e);
  248               sum += e.getValue();
  249               coord++;
  250             } else {
  251               Explanation r =
  252                 new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");
  253               r.addDetail(e);
  254               sumExpl.addDetail(r);
  255               fail = true;
  256             }
  257             if (c.getOccur() == Occur.SHOULD)
  258               shouldMatchCount++;
  259           } else if (c.isRequired()) {
  260             Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
  261             r.addDetail(e);
  262             sumExpl.addDetail(r);
  263             fail = true;
  264           }
  265         }
  266         if (fail) {
  267           sumExpl.setMatch(Boolean.FALSE);
  268           sumExpl.setValue(0.0f);
  269           sumExpl.setDescription
  270             ("Failure to meet condition(s) of required/prohibited clause(s)");
  271           return sumExpl;
  272         } else if (shouldMatchCount < minShouldMatch) {
  273           sumExpl.setMatch(Boolean.FALSE);
  274           sumExpl.setValue(0.0f);
  275           sumExpl.setDescription("Failure to match minimum number "+
  276                                  "of optional clauses: " + minShouldMatch);
  277           return sumExpl;
  278         }
  279         
  280         sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
  281         sumExpl.setValue(sum);
  282         
  283         float coordFactor = similarity.coord(coord, maxCoord);
  284         if (coordFactor == 1.0f)                      // coord is no-op
  285           return sumExpl;                             // eliminate wrapper
  286         else {
  287           ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
  288                                                              sum*coordFactor,
  289                                                              "product of:");
  290           result.addDetail(sumExpl);
  291           result.addDetail(new Explanation(coordFactor,
  292                                            "coord("+coord+"/"+maxCoord+")"));
  293           return result;
  294         }
  295       }
  296   
  297       @Override
  298       public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer)
  299           throws IOException {
  300         List<Scorer> required = new ArrayList<Scorer>();
  301         List<Scorer> prohibited = new ArrayList<Scorer>();
  302         List<Scorer> optional = new ArrayList<Scorer>();
  303         Iterator<BooleanClause> cIter = clauses.iterator();
  304         for (Weight w  : weights) {
  305           BooleanClause c =  cIter.next();
  306           Scorer subScorer = w.scorer(reader, true, false);
  307           if (subScorer == null) {
  308             if (c.isRequired()) {
  309               return null;
  310             }
  311           } else if (c.isRequired()) {
  312             required.add(subScorer);
  313           } else if (c.isProhibited()) {
  314             prohibited.add(subScorer);
  315           } else {
  316             optional.add(subScorer);
  317           }
  318         }
  319         
  320         // Check if we can return a BooleanScorer
  321         if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) {
  322           return new BooleanScorer(similarity, minNrShouldMatch, optional, prohibited);
  323         }
  324         
  325         if (required.size() == 0 && optional.size() == 0) {
  326           // no required and optional clauses.
  327           return null;
  328         } else if (optional.size() < minNrShouldMatch) {
  329           // either >1 req scorer, or there are 0 req scorers and at least 1
  330           // optional scorer. Therefore if there are not enough optional scorers
  331           // no documents will be matched by the query
  332           return null;
  333         }
  334         
  335         // Return a BooleanScorer2
  336         return new BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional);
  337       }
  338       
  339       @Override
  340       public boolean scoresDocsOutOfOrder() {
  341         int numProhibited = 0;
  342         for (BooleanClause c : clauses) {
  343           if (c.isRequired()) {
  344             return false; // BS2 (in-order) will be used by scorer()
  345           } else if (c.isProhibited()) {
  346             ++numProhibited;
  347           }
  348         }
  349         
  350         if (numProhibited > 32) { // cannot use BS
  351           return false;
  352         }
  353         
  354         // scorer() will return an out-of-order scorer if requested.
  355         return true;
  356       }
  357       
  358     }
  359   
  360     @Override
  361     public Weight createWeight(Searcher searcher) throws IOException {
  362       return new BooleanWeight(searcher);
  363     }
  364   
  365     @Override
  366     public Query rewrite(IndexReader reader) throws IOException {
  367       if (minNrShouldMatch == 0 && clauses.size() == 1) {                    // optimize 1-clause queries
  368         BooleanClause c = clauses.get(0);
  369         if (!c.isProhibited()) {			  // just return clause
  370   
  371           Query query = c.getQuery().rewrite(reader);    // rewrite first
  372   
  373           if (getBoost() != 1.0f) {                 // incorporate boost
  374             if (query == c.getQuery())                   // if rewrite was no-op
  375               query = (Query)query.clone();         // then clone before boost
  376             query.setBoost(getBoost() * query.getBoost());
  377           }
  378   
  379           return query;
  380         }
  381       }
  382   
  383       BooleanQuery clone = null;                    // recursively rewrite
  384       for (int i = 0 ; i < clauses.size(); i++) {
  385         BooleanClause c = clauses.get(i);
  386         Query query = c.getQuery().rewrite(reader);
  387         if (query != c.getQuery()) {                     // clause rewrote: must clone
  388           if (clone == null)
  389             clone = (BooleanQuery)this.clone();
  390           clone.clauses.set(i, new BooleanClause(query, c.getOccur()));
  391         }
  392       }
  393       if (clone != null) {
  394         return clone;                               // some clauses rewrote
  395       } else
  396         return this;                                // no clauses rewrote
  397     }
  398   
  399     // inherit javadoc
  400     @Override
  401     public void extractTerms(Set<Term> terms) {
  402         for (BooleanClause clause : clauses) {
  403             clause.getQuery().extractTerms(terms);
  404           }
  405     }
  406   
  407     @Override @SuppressWarnings("unchecked")
  408     public Object clone() {
  409       BooleanQuery clone = (BooleanQuery)super.clone();
  410       clone.clauses = (ArrayList<BooleanClause>) this.clauses.clone();
  411       return clone;
  412     }
  413   
  414     /** Prints a user-readable version of this query. */
  415     @Override
  416     public String toString(String field) {
  417       StringBuilder buffer = new StringBuilder();
  418       boolean needParens=(getBoost() != 1.0) || (getMinimumNumberShouldMatch()>0) ;
  419       if (needParens) {
  420         buffer.append("(");
  421       }
  422   
  423       for (int i = 0 ; i < clauses.size(); i++) {
  424         BooleanClause c = clauses.get(i);
  425         if (c.isProhibited())
  426           buffer.append("-");
  427         else if (c.isRequired())
  428           buffer.append("+");
  429   
  430         Query subQuery = c.getQuery();
  431         if (subQuery != null) {
  432           if (subQuery instanceof BooleanQuery) {	  // wrap sub-bools in parens
  433             buffer.append("(");
  434             buffer.append(subQuery.toString(field));
  435             buffer.append(")");
  436           } else {
  437             buffer.append(subQuery.toString(field));
  438           }
  439         } else {
  440           buffer.append("null");
  441         }
  442   
  443         if (i != clauses.size()-1)
  444           buffer.append(" ");
  445       }
  446   
  447       if (needParens) {
  448         buffer.append(")");
  449       }
  450   
  451       if (getMinimumNumberShouldMatch()>0) {
  452         buffer.append('~');
  453         buffer.append(getMinimumNumberShouldMatch());
  454       }
  455   
  456       if (getBoost() != 1.0f)
  457       {
  458         buffer.append(ToStringUtils.boost(getBoost()));
  459       }
  460   
  461       return buffer.toString();
  462     }
  463   
  464     /** Returns true iff <code>o</code> is equal to this. */
  465     @Override
  466     public boolean equals(Object o) {
  467       if (!(o instanceof BooleanQuery))
  468         return false;
  469       BooleanQuery other = (BooleanQuery)o;
  470       return (this.getBoost() == other.getBoost())
  471           && this.clauses.equals(other.clauses)
  472           && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch()
  473           && this.disableCoord == other.disableCoord;
  474     }
  475   
  476     /** Returns a hash code value for this object.*/
  477     @Override
  478     public int hashCode() {
  479       return Float.floatToIntBits(getBoost()) ^ clauses.hashCode()
  480         + getMinimumNumberShouldMatch() + (disableCoord ? 17:0);
  481     }
  482     
  483   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » search » [javadoc | source]