Save This Page
Home » nutch-1.0 » org.apache.nutch » searcher » [javadoc | source]
    1   /**
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *     http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.nutch.searcher;
   19   
   20   import org.apache.lucene.search.BooleanClause;
   21   import org.apache.lucene.search.BooleanQuery;
   22   import org.apache.lucene.search.PhraseQuery;
   23   import org.apache.lucene.search.TermQuery;
   24   import org.apache.lucene.index.Term;
   25   
   26   import org.apache.nutch.analysis.CommonGrams;
   27   
   28   import org.apache.nutch.searcher.Query.Clause;
   29   import org.apache.nutch.searcher.Query.Phrase;
   30   import org.apache.hadoop.conf.Configuration;
   31   
   32   /** Translate query fields to search the same-named field, as indexed by an
   33    * IndexingFilter.  Best for tokenized fields. */
   34   public abstract class FieldQueryFilter implements QueryFilter {
   35     private String field;
   36     private float boost = 1.0f;
   37     private Configuration conf;
   38     private CommonGrams commonGrams;
   39   
   40     /** Construct for the named field.*/
   41     protected FieldQueryFilter(String field) {
   42       this(field, 1.0f);
   43     }
   44   
   45     /** Construct for the named field, boosting as specified.*/
   46     protected FieldQueryFilter(String field, float boost) {
   47       this.field = field;
   48       this.boost = boost;
   49     }
   50   
   51     public BooleanQuery filter(Query input, BooleanQuery output)
   52       throws QueryException {
   53       
   54       // examine each clause in the Nutch query
   55       Clause[] clauses = input.getClauses();
   56       for (int i = 0; i < clauses.length; i++) {
   57         Clause c = clauses[i];
   58   
   59         // skip non-matching clauses
   60         if (!c.getField().equals(field))
   61           continue;
   62   
   63         // optimize phrase clause
   64         if (c.isPhrase()) {
   65           String[] opt = this.commonGrams.optimizePhrase(c.getPhrase(), field);
   66           if (opt.length==1) {
   67             c = new Clause(new Query.Term(opt[0]),
   68                            c.isRequired(), c.isProhibited(), getConf());
   69           } else {
   70             c = new Clause(new Phrase(opt), c.isRequired(), c.isProhibited(), getConf());
   71           }
   72         }
   73   
   74         // construct appropriate Lucene clause
   75         org.apache.lucene.search.Query luceneClause;
   76         if (c.isPhrase()) {
   77           Phrase nutchPhrase = c.getPhrase();
   78           Query.Term[] terms = nutchPhrase.getTerms();
   79           PhraseQuery lucenePhrase = new PhraseQuery();
   80           for (int j = 0; j < terms.length; j++) {
   81             lucenePhrase.add(new Term(field, terms[j].toString()));
   82           }
   83           luceneClause = lucenePhrase;
   84         } else {
   85           luceneClause = new TermQuery(new Term(field, c.getTerm().toString()));
   86         }
   87   
   88         // set boost
   89         luceneClause.setBoost(boost);
   90         // add it as specified in query
   91         
   92         output.add(luceneClause, 
   93             (c.isProhibited()
   94                 ? BooleanClause.Occur.MUST_NOT
   95                 : (c.isRequired()
   96                     ? BooleanClause.Occur.MUST
   97                     : BooleanClause.Occur.SHOULD
   98                    )
   99              ));
  100       }
  101       
  102       // return the modified Lucene query
  103       return output;
  104     }
  105     
  106     public void setConf(Configuration conf) {
  107       this.conf = conf;
  108       this.commonGrams = new CommonGrams(conf);
  109     }
  110   
  111     public Configuration getConf() {
  112       return this.conf;
  113     }
  114   }

Save This Page
Home » nutch-1.0 » org.apache.nutch » searcher » [javadoc | source]