1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.nutch.searcher;
19
20 import org.apache.lucene.search.BooleanClause;
21 import org.apache.lucene.search.BooleanQuery;
22 import org.apache.lucene.search.PhraseQuery;
23 import org.apache.lucene.search.TermQuery;
24 import org.apache.lucene.index.Term;
25
26 import org.apache.nutch.analysis.CommonGrams;
27
28 import org.apache.nutch.searcher.Query.Clause;
29 import org.apache.nutch.searcher.Query.Phrase;
30 import org.apache.hadoop.conf.Configuration;
31
32 /** Translate query fields to search the same-named field, as indexed by an
33 * IndexingFilter. Best for tokenized fields. */
34 public abstract class FieldQueryFilter implements QueryFilter {
35 private String field;
36 private float boost = 1.0f;
37 private Configuration conf;
38 private CommonGrams commonGrams;
39
40 /** Construct for the named field.*/
41 protected FieldQueryFilter(String field) {
42 this(field, 1.0f);
43 }
44
45 /** Construct for the named field, boosting as specified.*/
46 protected FieldQueryFilter(String field, float boost) {
47 this.field = field;
48 this.boost = boost;
49 }
50
51 public BooleanQuery filter(Query input, BooleanQuery output)
52 throws QueryException {
53
54 // examine each clause in the Nutch query
55 Clause[] clauses = input.getClauses();
56 for (int i = 0; i < clauses.length; i++) {
57 Clause c = clauses[i];
58
59 // skip non-matching clauses
60 if (!c.getField().equals(field))
61 continue;
62
63 // optimize phrase clause
64 if (c.isPhrase()) {
65 String[] opt = this.commonGrams.optimizePhrase(c.getPhrase(), field);
66 if (opt.length==1) {
67 c = new Clause(new Query.Term(opt[0]),
68 c.isRequired(), c.isProhibited(), getConf());
69 } else {
70 c = new Clause(new Phrase(opt), c.isRequired(), c.isProhibited(), getConf());
71 }
72 }
73
74 // construct appropriate Lucene clause
75 org.apache.lucene.search.Query luceneClause;
76 if (c.isPhrase()) {
77 Phrase nutchPhrase = c.getPhrase();
78 Query.Term[] terms = nutchPhrase.getTerms();
79 PhraseQuery lucenePhrase = new PhraseQuery();
80 for (int j = 0; j < terms.length; j++) {
81 lucenePhrase.add(new Term(field, terms[j].toString()));
82 }
83 luceneClause = lucenePhrase;
84 } else {
85 luceneClause = new TermQuery(new Term(field, c.getTerm().toString()));
86 }
87
88 // set boost
89 luceneClause.setBoost(boost);
90 // add it as specified in query
91
92 output.add(luceneClause,
93 (c.isProhibited()
94 ? BooleanClause.Occur.MUST_NOT
95 : (c.isRequired()
96 ? BooleanClause.Occur.MUST
97 : BooleanClause.Occur.SHOULD
98 )
99 ));
100 }
101
102 // return the modified Lucene query
103 return output;
104 }
105
106 public void setConf(Configuration conf) {
107 this.conf = conf;
108 this.commonGrams = new CommonGrams(conf);
109 }
110
111 public Configuration getConf() {
112 return this.conf;
113 }
114 }