1 package org.apache.lucene.search;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import org.apache.lucene.index.IndexReader;
21 import org.apache.lucene.util.ToStringUtils;
22 import org.apache.lucene.search.BooleanClause.Occur;
23
24 import java.io.IOException;
25 import java.util;
26
27 /** A Query that matches documents matching boolean combinations of other
28 * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
29 * BooleanQuerys.
30 */
31 public class BooleanQuery extends Query {
32
33
34 private static int maxClauseCount = 1024;
35
36 /** Thrown when an attempt is made to add more than {@link
37 * #getMaxClauseCount()} clauses. This typically happens if
38 * a PrefixQuery, FuzzyQuery, WildcardQuery, or RangeQuery
39 * is expanded to many terms during search.
40 */
41 public static class TooManyClauses extends RuntimeException {
42 public TooManyClauses() {}
43 public String getMessage() {
44 return "maxClauseCount is set to " + maxClauseCount;
45 }
46 }
47
48 /** Return the maximum number of clauses permitted, 1024 by default.
49 * Attempts to add more than the permitted number of clauses cause {@link
50 * TooManyClauses} to be thrown.
51 * @see #setMaxClauseCount(int)
52 */
53 public static int getMaxClauseCount() { return maxClauseCount; }
54
55 /** Set the maximum number of clauses permitted per BooleanQuery.
56 * Default value is 1024.
57 * <p>TermQuery clauses are generated from for example prefix queries and
58 * fuzzy queries. Each TermQuery needs some buffer space during search,
59 * so this parameter indirectly controls the maximum buffer requirements for
60 * query search.
61 * <p>When this parameter becomes a bottleneck for a Query one can use a
62 * Filter. For example instead of a {@link RangeQuery} one can use a
63 * {@link RangeFilter}.
64 * <p>Normally the buffers are allocated by the JVM. When using for example
65 * {@link org.apache.lucene.store.MMapDirectory} the buffering is left to
66 * the operating system.
67 */
68 public static void setMaxClauseCount(int maxClauseCount) {
69 if (maxClauseCount < 1)
70 throw new IllegalArgumentException("maxClauseCount must be >= 1");
71 BooleanQuery.maxClauseCount = maxClauseCount;
72 }
73
74 private ArrayList clauses = new ArrayList();
75 private boolean disableCoord;
76
77 /** Constructs an empty boolean query. */
78 public BooleanQuery() {}
79
80 /** Constructs an empty boolean query.
81 *
82 * {@link Similarity#coord(int,int)} may be disabled in scoring, as
83 * appropriate. For example, this score factor does not make sense for most
84 * automatically generated queries, like {@link WildcardQuery} and {@link
85 * FuzzyQuery}.
86 *
87 * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
88 */
89 public BooleanQuery(boolean disableCoord) {
90 this.disableCoord = disableCoord;
91 }
92
93 /** Returns true iff {@link Similarity#coord(int,int)} is disabled in
94 * scoring for this query instance.
95 * @see #BooleanQuery(boolean)
96 */
97 public boolean isCoordDisabled() { return disableCoord; }
98
99 // Implement coord disabling.
100 // Inherit javadoc.
101 public Similarity getSimilarity(Searcher searcher) {
102 Similarity result = super.getSimilarity(searcher);
103 if (disableCoord) { // disable coord as requested
104 result = new SimilarityDelegator(result) {
105 public float coord(int overlap, int maxOverlap) {
106 return 1.0f;
107 }
108 };
109 }
110 return result;
111 }
112
113 /**
114 * Specifies a minimum number of the optional BooleanClauses
115 * which must be satisfied.
116 *
117 * <p>
118 * By default no optional clauses are necessary for a match
119 * (unless there are no required clauses). If this method is used,
120 * then the specified number of clauses is required.
121 * </p>
122 * <p>
123 * Use of this method is totally independent of specifying that
124 * any specific clauses are required (or prohibited). This number will
125 * only be compared against the number of matching optional clauses.
126 * </p>
127 * <p>
128 * EXPERT NOTE: Using this method may force collecting docs in order,
129 * regardless of whether setAllowDocsOutOfOrder(true) has been called.
130 * </p>
131 *
132 * @param min the number of optional clauses that must match
133 * @see #setAllowDocsOutOfOrder
134 */
135 public void setMinimumNumberShouldMatch(int min) {
136 this.minNrShouldMatch = min;
137 }
138 protected int minNrShouldMatch = 0;
139
140 /**
141 * Gets the minimum number of the optional BooleanClauses
142 * which must be satisifed.
143 */
144 public int getMinimumNumberShouldMatch() {
145 return minNrShouldMatch;
146 }
147
148 /** Adds a clause to a boolean query.
149 *
150 * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
151 * @see #getMaxClauseCount()
152 */
153 public void add(Query query, BooleanClause.Occur occur) {
154 add(new BooleanClause(query, occur));
155 }
156
157 /** Adds a clause to a boolean query.
158 * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
159 * @see #getMaxClauseCount()
160 */
161 public void add(BooleanClause clause) {
162 if (clauses.size() >= maxClauseCount)
163 throw new TooManyClauses();
164
165 clauses.add(clause);
166 }
167
168 /** Returns the set of clauses in this query. */
169 public BooleanClause[] getClauses() {
170 return (BooleanClause[])clauses.toArray(new BooleanClause[clauses.size()]);
171 }
172
173 /** Returns the list of clauses in this query. */
174 public List clauses() { return clauses; }
175
176 private class BooleanWeight implements Weight {
177 protected Similarity similarity;
178 protected ArrayList weights = new ArrayList();
179
180 public BooleanWeight(Searcher searcher)
181 throws IOException {
182 this.similarity = getSimilarity(searcher);
183 for (int i = 0 ; i < clauses.size(); i++) {
184 BooleanClause c = (BooleanClause)clauses.get(i);
185 weights.add(c.getQuery().createWeight(searcher));
186 }
187 }
188
189 public Query getQuery() { return BooleanQuery.this; }
190 public float getValue() { return getBoost(); }
191
192 public float sumOfSquaredWeights() throws IOException {
193 float sum = 0.0f;
194 for (int i = 0 ; i < weights.size(); i++) {
195 BooleanClause c = (BooleanClause)clauses.get(i);
196 Weight w = (Weight)weights.get(i);
197 // call sumOfSquaredWeights for all clauses in case of side effects
198 float s = w.sumOfSquaredWeights(); // sum sub weights
199 if (!c.isProhibited())
200 // only add to sum for non-prohibited clauses
201 sum += s;
202 }
203
204 sum *= getBoost() * getBoost(); // boost each sub-weight
205
206 return sum ;
207 }
208
209
210 public void normalize(float norm) {
211 norm *= getBoost(); // incorporate boost
212 for (int i = 0 ; i < weights.size(); i++) {
213 Weight w = (Weight)weights.get(i);
214 // normalize all clauses, (even if prohibited in case of side affects)
215 w.normalize(norm);
216 }
217 }
218
219 /** @return Returns BooleanScorer2 that uses and provides skipTo(),
220 * and scores documents in document number order.
221 */
222 public Scorer scorer(IndexReader reader) throws IOException {
223 BooleanScorer2 result = new BooleanScorer2(similarity,
224 minNrShouldMatch,
225 allowDocsOutOfOrder);
226
227 for (int i = 0 ; i < weights.size(); i++) {
228 BooleanClause c = (BooleanClause)clauses.get(i);
229 Weight w = (Weight)weights.get(i);
230 Scorer subScorer = w.scorer(reader);
231 if (subScorer != null)
232 result.add(subScorer, c.isRequired(), c.isProhibited());
233 else if (c.isRequired())
234 return null;
235 }
236
237 return result;
238 }
239
240 public Explanation explain(IndexReader reader, int doc)
241 throws IOException {
242 final int minShouldMatch =
243 BooleanQuery.this.getMinimumNumberShouldMatch();
244 ComplexExplanation sumExpl = new ComplexExplanation();
245 sumExpl.setDescription("sum of:");
246 int coord = 0;
247 int maxCoord = 0;
248 float sum = 0.0f;
249 boolean fail = false;
250 int shouldMatchCount = 0;
251 for (int i = 0 ; i < weights.size(); i++) {
252 BooleanClause c = (BooleanClause)clauses.get(i);
253 Weight w = (Weight)weights.get(i);
254 Explanation e = w.explain(reader, doc);
255 if (!c.isProhibited()) maxCoord++;
256 if (e.isMatch()) {
257 if (!c.isProhibited()) {
258 sumExpl.addDetail(e);
259 sum += e.getValue();
260 coord++;
261 } else {
262 Explanation r =
263 new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");
264 r.addDetail(e);
265 sumExpl.addDetail(r);
266 fail = true;
267 }
268 if (c.getOccur().equals(Occur.SHOULD))
269 shouldMatchCount++;
270 } else if (c.isRequired()) {
271 Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
272 r.addDetail(e);
273 sumExpl.addDetail(r);
274 fail = true;
275 }
276 }
277 if (fail) {
278 sumExpl.setMatch(Boolean.FALSE);
279 sumExpl.setValue(0.0f);
280 sumExpl.setDescription
281 ("Failure to meet condition(s) of required/prohibited clause(s)");
282 return sumExpl;
283 } else if (shouldMatchCount < minShouldMatch) {
284 sumExpl.setMatch(Boolean.FALSE);
285 sumExpl.setValue(0.0f);
286 sumExpl.setDescription("Failure to match minimum number "+
287 "of optional clauses: " + minShouldMatch);
288 return sumExpl;
289 }
290
291 sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
292 sumExpl.setValue(sum);
293
294 float coordFactor = similarity.coord(coord, maxCoord);
295 if (coordFactor == 1.0f) // coord is no-op
296 return sumExpl; // eliminate wrapper
297 else {
298 ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
299 sum*coordFactor,
300 "product of:");
301 result.addDetail(sumExpl);
302 result.addDetail(new Explanation(coordFactor,
303 "coord("+coord+"/"+maxCoord+")"));
304 return result;
305 }
306 }
307 }
308
309 /** Whether hit docs may be collected out of docid order. */
310 private static boolean allowDocsOutOfOrder = false;
311
312 /**
313 * Expert: Indicates whether hit docs may be collected out of docid
314 * order.
315 *
316 * <p>
317 * Background: although the contract of the Scorer class requires that
318 * documents be iterated in order of doc id, this was not true in early
319 * versions of Lucene. Many pieces of functionality in the current
320 * Lucene code base have undefined behavior if this contract is not
321 * upheld, but in some specific simple cases may be faster. (For
322 * example: disjunction queries with less than 32 prohibited clauses;
323 * This setting has no effect for other queries.)
324 * </p>
325 *
326 * <p>
327 * Specifics: By setting this option to true, calls to
328 * {@link HitCollector#collect(int,float)} might be
329 * invoked first for docid N and only later for docid N-1.
330 * Being static, this setting is system wide.
331 * </p>
332 */
333 public static void setAllowDocsOutOfOrder(boolean allow) {
334 allowDocsOutOfOrder = allow;
335 }
336
337 /**
338 * Whether hit docs may be collected out of docid order.
339 * @see #setAllowDocsOutOfOrder(boolean)
340 */
341 public static boolean getAllowDocsOutOfOrder() {
342 return allowDocsOutOfOrder;
343 }
344
345 /**
346 * @deprecated Use {@link #setAllowDocsOutOfOrder(boolean)} instead.
347 */
348 public static void setUseScorer14(boolean use14) {
349 setAllowDocsOutOfOrder(use14);
350 }
351
352 /**
353 * @deprecated Use {@link #getAllowDocsOutOfOrder()} instead.
354 */
355 public static boolean getUseScorer14() {
356 return getAllowDocsOutOfOrder();
357 }
358
359 protected Weight createWeight(Searcher searcher) throws IOException {
360 return new BooleanWeight(searcher);
361 }
362
363 public Query rewrite(IndexReader reader) throws IOException {
364 if (minNrShouldMatch == 0 && clauses.size() == 1) { // optimize 1-clause queries
365 BooleanClause c = (BooleanClause)clauses.get(0);
366 if (!c.isProhibited()) { // just return clause
367
368 Query query = c.getQuery().rewrite(reader); // rewrite first
369
370 if (getBoost() != 1.0f) { // incorporate boost
371 if (query == c.getQuery()) // if rewrite was no-op
372 query = (Query)query.clone(); // then clone before boost
373 query.setBoost(getBoost() * query.getBoost());
374 }
375
376 return query;
377 }
378 }
379
380 BooleanQuery clone = null; // recursively rewrite
381 for (int i = 0 ; i < clauses.size(); i++) {
382 BooleanClause c = (BooleanClause)clauses.get(i);
383 Query query = c.getQuery().rewrite(reader);
384 if (query != c.getQuery()) { // clause rewrote: must clone
385 if (clone == null)
386 clone = (BooleanQuery)this.clone();
387 clone.clauses.set(i, new BooleanClause(query, c.getOccur()));
388 }
389 }
390 if (clone != null) {
391 return clone; // some clauses rewrote
392 } else
393 return this; // no clauses rewrote
394 }
395
396 // inherit javadoc
397 public void extractTerms(Set terms) {
398 for (Iterator i = clauses.iterator(); i.hasNext();) {
399 BooleanClause clause = (BooleanClause) i.next();
400 clause.getQuery().extractTerms(terms);
401 }
402 }
403
404 public Object clone() {
405 BooleanQuery clone = (BooleanQuery)super.clone();
406 clone.clauses = (ArrayList)this.clauses.clone();
407 return clone;
408 }
409
410 /** Prints a user-readable version of this query. */
411 public String toString(String field) {
412 StringBuffer buffer = new StringBuffer();
413 boolean needParens=(getBoost() != 1.0) || (getMinimumNumberShouldMatch()>0) ;
414 if (needParens) {
415 buffer.append("(");
416 }
417
418 for (int i = 0 ; i < clauses.size(); i++) {
419 BooleanClause c = (BooleanClause)clauses.get(i);
420 if (c.isProhibited())
421 buffer.append("-");
422 else if (c.isRequired())
423 buffer.append("+");
424
425 Query subQuery = c.getQuery();
426 if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens
427 buffer.append("(");
428 buffer.append(c.getQuery().toString(field));
429 buffer.append(")");
430 } else
431 buffer.append(c.getQuery().toString(field));
432
433 if (i != clauses.size()-1)
434 buffer.append(" ");
435 }
436
437 if (needParens) {
438 buffer.append(")");
439 }
440
441 if (getMinimumNumberShouldMatch()>0) {
442 buffer.append('~');
443 buffer.append(getMinimumNumberShouldMatch());
444 }
445
446 if (getBoost() != 1.0f)
447 {
448 buffer.append(ToStringUtils.boost(getBoost()));
449 }
450
451 return buffer.toString();
452 }
453
454 /** Returns true iff <code>o</code> is equal to this. */
455 public boolean equals(Object o) {
456 if (!(o instanceof BooleanQuery))
457 return false;
458 BooleanQuery other = (BooleanQuery)o;
459 return (this.getBoost() == other.getBoost())
460 && this.clauses.equals(other.clauses)
461 && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch();
462 }
463
464 /** Returns a hash code value for this object.*/
465 public int hashCode() {
466 return Float.floatToIntBits(getBoost()) ^ clauses.hashCode()
467 + getMinimumNumberShouldMatch();
468 }
469
470 }