Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » search » [javadoc | source]
    1   package org.apache.lucene.search;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import org.apache.lucene.index.IndexReader;
   21   import org.apache.lucene.util.ToStringUtils;
   22   import org.apache.lucene.search.BooleanClause.Occur;
   23   
   24   import java.io.IOException;
   25   import java.util;
   26   
   27   /** A Query that matches documents matching boolean combinations of other
   28     * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
   29     * BooleanQuerys.
   30     */
   31   public class BooleanQuery extends Query {
   32   
   33     
   34     private static int maxClauseCount = 1024;
   35   
   36     /** Thrown when an attempt is made to add more than {@link
   37      * #getMaxClauseCount()} clauses. This typically happens if
   38      * a PrefixQuery, FuzzyQuery, WildcardQuery, or RangeQuery 
   39      * is expanded to many terms during search. 
   40      */
   41     public static class TooManyClauses extends RuntimeException {
   42       public TooManyClauses() {}
   43       public String getMessage() {
   44         return "maxClauseCount is set to " + maxClauseCount;
   45       }
   46     }
   47   
   48     /** Return the maximum number of clauses permitted, 1024 by default.
   49      * Attempts to add more than the permitted number of clauses cause {@link
   50      * TooManyClauses} to be thrown.
   51      * @see #setMaxClauseCount(int)
   52      */
   53     public static int getMaxClauseCount() { return maxClauseCount; }
   54   
   55     /** Set the maximum number of clauses permitted per BooleanQuery.
   56      * Default value is 1024.
   57      * <p>TermQuery clauses are generated from for example prefix queries and
   58      * fuzzy queries. Each TermQuery needs some buffer space during search,
   59      * so this parameter indirectly controls the maximum buffer requirements for
   60      * query search.
   61      * <p>When this parameter becomes a bottleneck for a Query one can use a
   62      * Filter. For example instead of a {@link RangeQuery} one can use a
   63      * {@link RangeFilter}.
   64      * <p>Normally the buffers are allocated by the JVM. When using for example
   65      * {@link org.apache.lucene.store.MMapDirectory} the buffering is left to
   66      * the operating system.
   67      */
   68     public static void setMaxClauseCount(int maxClauseCount) {
   69       if (maxClauseCount < 1)
   70         throw new IllegalArgumentException("maxClauseCount must be >= 1");
   71       BooleanQuery.maxClauseCount = maxClauseCount;
   72     }
   73   
   74     private ArrayList clauses = new ArrayList();
   75     private boolean disableCoord;
   76   
   77     /** Constructs an empty boolean query. */
   78     public BooleanQuery() {}
   79   
   80     /** Constructs an empty boolean query.
   81      *
   82      * {@link Similarity#coord(int,int)} may be disabled in scoring, as
   83      * appropriate. For example, this score factor does not make sense for most
   84      * automatically generated queries, like {@link WildcardQuery} and {@link
   85      * FuzzyQuery}.
   86      *
   87      * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
   88      */
   89     public BooleanQuery(boolean disableCoord) {
   90       this.disableCoord = disableCoord;
   91     }
   92   
   93     /** Returns true iff {@link Similarity#coord(int,int)} is disabled in
   94      * scoring for this query instance.
   95      * @see #BooleanQuery(boolean)
   96      */
   97     public boolean isCoordDisabled() { return disableCoord; }
   98   
   99     // Implement coord disabling.
  100     // Inherit javadoc.
  101     public Similarity getSimilarity(Searcher searcher) {
  102       Similarity result = super.getSimilarity(searcher);
  103       if (disableCoord) {                           // disable coord as requested
  104         result = new SimilarityDelegator(result) {
  105             public float coord(int overlap, int maxOverlap) {
  106               return 1.0f;
  107             }
  108           };
  109       }
  110       return result;
  111     }
  112   
  113     /**
  114      * Specifies a minimum number of the optional BooleanClauses
  115      * which must be satisfied.
  116      *
  117      * <p>
  118      * By default no optional clauses are necessary for a match
  119      * (unless there are no required clauses).  If this method is used,
  120      * then the specified number of clauses is required.
  121      * </p>
  122      * <p>
  123      * Use of this method is totally independent of specifying that
  124      * any specific clauses are required (or prohibited).  This number will
  125      * only be compared against the number of matching optional clauses.
  126      * </p>
  127      * <p>
  128      * EXPERT NOTE: Using this method may force collecting docs in order,
  129      * regardless of whether setAllowDocsOutOfOrder(true) has been called.
  130      * </p>
  131      *
  132      * @param min the number of optional clauses that must match
  133      * @see #setAllowDocsOutOfOrder
  134      */
  135     public void setMinimumNumberShouldMatch(int min) {
  136       this.minNrShouldMatch = min;
  137     }
  138     protected int minNrShouldMatch = 0;
  139   
  140     /**
  141      * Gets the minimum number of the optional BooleanClauses
  142      * which must be satisifed.
  143      */
  144     public int getMinimumNumberShouldMatch() {
  145       return minNrShouldMatch;
  146     }
  147   
  148     /** Adds a clause to a boolean query.
  149      *
  150      * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
  151      * @see #getMaxClauseCount()
  152      */
  153     public void add(Query query, BooleanClause.Occur occur) {
  154       add(new BooleanClause(query, occur));
  155     }
  156   
  157     /** Adds a clause to a boolean query.
  158      * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
  159      * @see #getMaxClauseCount()
  160      */
  161     public void add(BooleanClause clause) {
  162       if (clauses.size() >= maxClauseCount)
  163         throw new TooManyClauses();
  164   
  165       clauses.add(clause);
  166     }
  167   
  168     /** Returns the set of clauses in this query. */
  169     public BooleanClause[] getClauses() {
  170       return (BooleanClause[])clauses.toArray(new BooleanClause[clauses.size()]);
  171     }
  172   
  173     /** Returns the list of clauses in this query. */
  174     public List clauses() { return clauses; }
  175   
  176     private class BooleanWeight implements Weight {
  177       protected Similarity similarity;
  178       protected ArrayList weights = new ArrayList();
  179   
  180       public BooleanWeight(Searcher searcher)
  181         throws IOException {
  182         this.similarity = getSimilarity(searcher);
  183         for (int i = 0 ; i < clauses.size(); i++) {
  184           BooleanClause c = (BooleanClause)clauses.get(i);
  185           weights.add(c.getQuery().createWeight(searcher));
  186         }
  187       }
  188   
  189       public Query getQuery() { return BooleanQuery.this; }
  190       public float getValue() { return getBoost(); }
  191   
  192       public float sumOfSquaredWeights() throws IOException {
  193         float sum = 0.0f;
  194         for (int i = 0 ; i < weights.size(); i++) {
  195           BooleanClause c = (BooleanClause)clauses.get(i);
  196           Weight w = (Weight)weights.get(i);
  197           // call sumOfSquaredWeights for all clauses in case of side effects
  198           float s = w.sumOfSquaredWeights();         // sum sub weights
  199           if (!c.isProhibited())
  200             // only add to sum for non-prohibited clauses
  201             sum += s;
  202         }
  203   
  204         sum *= getBoost() * getBoost();             // boost each sub-weight
  205   
  206         return sum ;
  207       }
  208   
  209   
  210       public void normalize(float norm) {
  211         norm *= getBoost();                         // incorporate boost
  212         for (int i = 0 ; i < weights.size(); i++) {
  213           Weight w = (Weight)weights.get(i);
  214           // normalize all clauses, (even if prohibited in case of side affects)
  215           w.normalize(norm);
  216         }
  217       }
  218   
  219       /** @return Returns BooleanScorer2 that uses and provides skipTo(),
  220        *          and scores documents in document number order.
  221        */
  222       public Scorer scorer(IndexReader reader) throws IOException {
  223         BooleanScorer2 result = new BooleanScorer2(similarity,
  224                                                    minNrShouldMatch,
  225                                                    allowDocsOutOfOrder);
  226   
  227         for (int i = 0 ; i < weights.size(); i++) {
  228           BooleanClause c = (BooleanClause)clauses.get(i);
  229           Weight w = (Weight)weights.get(i);
  230           Scorer subScorer = w.scorer(reader);
  231           if (subScorer != null)
  232             result.add(subScorer, c.isRequired(), c.isProhibited());
  233           else if (c.isRequired())
  234             return null;
  235         }
  236   
  237         return result;
  238       }
  239   
  240       public Explanation explain(IndexReader reader, int doc)
  241         throws IOException {
  242         final int minShouldMatch =
  243           BooleanQuery.this.getMinimumNumberShouldMatch();
  244         ComplexExplanation sumExpl = new ComplexExplanation();
  245         sumExpl.setDescription("sum of:");
  246         int coord = 0;
  247         int maxCoord = 0;
  248         float sum = 0.0f;
  249         boolean fail = false;
  250         int shouldMatchCount = 0;
  251         for (int i = 0 ; i < weights.size(); i++) {
  252           BooleanClause c = (BooleanClause)clauses.get(i);
  253           Weight w = (Weight)weights.get(i);
  254           Explanation e = w.explain(reader, doc);
  255           if (!c.isProhibited()) maxCoord++;
  256           if (e.isMatch()) {
  257             if (!c.isProhibited()) {
  258               sumExpl.addDetail(e);
  259               sum += e.getValue();
  260               coord++;
  261             } else {
  262               Explanation r =
  263                 new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");
  264               r.addDetail(e);
  265               sumExpl.addDetail(r);
  266               fail = true;
  267             }
  268             if (c.getOccur().equals(Occur.SHOULD))
  269               shouldMatchCount++;
  270           } else if (c.isRequired()) {
  271             Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
  272             r.addDetail(e);
  273             sumExpl.addDetail(r);
  274             fail = true;
  275           }
  276         }
  277         if (fail) {
  278           sumExpl.setMatch(Boolean.FALSE);
  279           sumExpl.setValue(0.0f);
  280           sumExpl.setDescription
  281             ("Failure to meet condition(s) of required/prohibited clause(s)");
  282           return sumExpl;
  283         } else if (shouldMatchCount < minShouldMatch) {
  284           sumExpl.setMatch(Boolean.FALSE);
  285           sumExpl.setValue(0.0f);
  286           sumExpl.setDescription("Failure to match minimum number "+
  287                                  "of optional clauses: " + minShouldMatch);
  288           return sumExpl;
  289         }
  290         
  291         sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
  292         sumExpl.setValue(sum);
  293         
  294         float coordFactor = similarity.coord(coord, maxCoord);
  295         if (coordFactor == 1.0f)                      // coord is no-op
  296           return sumExpl;                             // eliminate wrapper
  297         else {
  298           ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
  299                                                              sum*coordFactor,
  300                                                              "product of:");
  301           result.addDetail(sumExpl);
  302           result.addDetail(new Explanation(coordFactor,
  303                                            "coord("+coord+"/"+maxCoord+")"));
  304           return result;
  305         }
  306       }
  307     }
  308   
  309     /** Whether hit docs may be collected out of docid order. */
  310     private static boolean allowDocsOutOfOrder = false;
  311   
  312     /**
  313      * Expert: Indicates whether hit docs may be collected out of docid
  314      * order.
  315      *
  316      * <p>
  317      * Background: although the contract of the Scorer class requires that
  318      * documents be iterated in order of doc id, this was not true in early
  319      * versions of Lucene.  Many pieces of functionality in the current
  320      * Lucene code base have undefined behavior if this contract is not
  321      * upheld, but in some specific simple cases may be faster.  (For
  322      * example: disjunction queries with less than 32 prohibited clauses;
  323      * This setting has no effect for other queries.)
  324      * </p>
  325      *
  326      * <p>
  327      * Specifics: By setting this option to true, calls to 
  328      * {@link HitCollector#collect(int,float)} might be
  329      * invoked first for docid N and only later for docid N-1.
  330      * Being static, this setting is system wide.
  331      * </p>
  332      */
  333     public static void setAllowDocsOutOfOrder(boolean allow) {
  334       allowDocsOutOfOrder = allow;
  335     }  
  336     
  337     /**
  338      * Whether hit docs may be collected out of docid order.
  339      * @see #setAllowDocsOutOfOrder(boolean)
  340      */
  341     public static boolean getAllowDocsOutOfOrder() {
  342       return allowDocsOutOfOrder;
  343     }  
  344     
  345     /**
  346      * @deprecated Use {@link #setAllowDocsOutOfOrder(boolean)} instead. 
  347      */
  348     public static void setUseScorer14(boolean use14) {
  349   	setAllowDocsOutOfOrder(use14);
  350     }
  351     
  352     /**
  353      * @deprecated Use {@link #getAllowDocsOutOfOrder()} instead.
  354      */
  355     public static boolean getUseScorer14() {
  356   	return getAllowDocsOutOfOrder();
  357     }
  358   
  359     protected Weight createWeight(Searcher searcher) throws IOException {
  360       return new BooleanWeight(searcher);
  361     }
  362   
  363     public Query rewrite(IndexReader reader) throws IOException {
  364       if (minNrShouldMatch == 0 && clauses.size() == 1) {                    // optimize 1-clause queries
  365         BooleanClause c = (BooleanClause)clauses.get(0);
  366         if (!c.isProhibited()) {			  // just return clause
  367   
  368           Query query = c.getQuery().rewrite(reader);    // rewrite first
  369   
  370           if (getBoost() != 1.0f) {                 // incorporate boost
  371             if (query == c.getQuery())                   // if rewrite was no-op
  372               query = (Query)query.clone();         // then clone before boost
  373             query.setBoost(getBoost() * query.getBoost());
  374           }
  375   
  376           return query;
  377         }
  378       }
  379   
  380       BooleanQuery clone = null;                    // recursively rewrite
  381       for (int i = 0 ; i < clauses.size(); i++) {
  382         BooleanClause c = (BooleanClause)clauses.get(i);
  383         Query query = c.getQuery().rewrite(reader);
  384         if (query != c.getQuery()) {                     // clause rewrote: must clone
  385           if (clone == null)
  386             clone = (BooleanQuery)this.clone();
  387           clone.clauses.set(i, new BooleanClause(query, c.getOccur()));
  388         }
  389       }
  390       if (clone != null) {
  391         return clone;                               // some clauses rewrote
  392       } else
  393         return this;                                // no clauses rewrote
  394     }
  395   
  396     // inherit javadoc
  397     public void extractTerms(Set terms) {
  398         for (Iterator i = clauses.iterator(); i.hasNext();) {
  399             BooleanClause clause = (BooleanClause) i.next();
  400             clause.getQuery().extractTerms(terms);
  401           }
  402     }
  403   
  404     public Object clone() {
  405       BooleanQuery clone = (BooleanQuery)super.clone();
  406       clone.clauses = (ArrayList)this.clauses.clone();
  407       return clone;
  408     }
  409   
  410     /** Prints a user-readable version of this query. */
  411     public String toString(String field) {
  412       StringBuffer buffer = new StringBuffer();
  413       boolean needParens=(getBoost() != 1.0) || (getMinimumNumberShouldMatch()>0) ;
  414       if (needParens) {
  415         buffer.append("(");
  416       }
  417   
  418       for (int i = 0 ; i < clauses.size(); i++) {
  419         BooleanClause c = (BooleanClause)clauses.get(i);
  420         if (c.isProhibited())
  421           buffer.append("-");
  422         else if (c.isRequired())
  423           buffer.append("+");
  424   
  425         Query subQuery = c.getQuery();
  426         if (subQuery instanceof BooleanQuery) {	  // wrap sub-bools in parens
  427           buffer.append("(");
  428           buffer.append(c.getQuery().toString(field));
  429           buffer.append(")");
  430         } else
  431           buffer.append(c.getQuery().toString(field));
  432   
  433         if (i != clauses.size()-1)
  434           buffer.append(" ");
  435       }
  436   
  437       if (needParens) {
  438         buffer.append(")");
  439       }
  440   
  441       if (getMinimumNumberShouldMatch()>0) {
  442         buffer.append('~');
  443         buffer.append(getMinimumNumberShouldMatch());
  444       }
  445   
  446       if (getBoost() != 1.0f)
  447       {
  448         buffer.append(ToStringUtils.boost(getBoost()));
  449       }
  450   
  451       return buffer.toString();
  452     }
  453   
  454     /** Returns true iff <code>o</code> is equal to this. */
  455     public boolean equals(Object o) {
  456       if (!(o instanceof BooleanQuery))
  457         return false;
  458       BooleanQuery other = (BooleanQuery)o;
  459       return (this.getBoost() == other.getBoost())
  460           && this.clauses.equals(other.clauses)
  461           && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch();
  462     }
  463   
  464     /** Returns a hash code value for this object.*/
  465     public int hashCode() {
  466       return Float.floatToIntBits(getBoost()) ^ clauses.hashCode()
  467              + getMinimumNumberShouldMatch();
  468     }
  469   
  470   }

Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » search » [javadoc | source]