1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.nutch.searcher;
19
20 import java.io.File;
21 import java.io.IOException;
22 import java.util.List;
23
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.fs.FileSystem;
26 import org.apache.hadoop.fs.Path;
27 import org.apache.hadoop.io.FloatWritable;
28 import org.apache.hadoop.io.IntWritable;
29 import org.apache.hadoop.io.Text;
30 import org.apache.hadoop.io.WritableComparable;
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.Field;
33 import org.apache.lucene.index.IndexReader;
34 import org.apache.lucene.index.MultiReader;
35 import org.apache.lucene.search.FieldCache;
36 import org.apache.lucene.search.FieldDoc;
37 import org.apache.lucene.search.ScoreDoc;
38 import org.apache.lucene.search.TopDocs;
39 import org.apache.lucene.store.Directory;
40 import org.apache.lucene.store.FSDirectory;
41 import org.apache.nutch.indexer.FsDirectory;
42 import org.apache.nutch.indexer.NutchSimilarity;
43
44 /** Implements {@link Searcher} and {@link HitDetailer} for either a single
45 * merged index, or a set of indexes. */
46 public class IndexSearcher implements Searcher, HitDetailer {
47
48 private org.apache.lucene.search.Searcher luceneSearcher;
49 private org.apache.lucene.index.IndexReader reader;
50 private LuceneQueryOptimizer optimizer;
51 private FileSystem fs;
52 private Configuration conf;
53 private QueryFilters queryFilters;
54
55 /** Construct given a number of indexes. */
56 public IndexSearcher(Path[] indexDirs, Configuration conf) throws IOException {
57 IndexReader[] readers = new IndexReader[indexDirs.length];
58 this.conf = conf;
59 this.fs = FileSystem.get(conf);
60 for (int i = 0; i < indexDirs.length; i++) {
61 readers[i] = IndexReader.open(getDirectory(indexDirs[i]));
62 }
63 init(new MultiReader(readers), conf);
64 }
65
66 /** Construct given a single merged index. */
67 public IndexSearcher(Path index, Configuration conf)
68 throws IOException {
69 this.conf = conf;
70 this.fs = FileSystem.get(conf);
71 init(IndexReader.open(getDirectory(index)), conf);
72 }
73
74 private void init(IndexReader reader, Configuration conf) throws IOException {
75 this.reader = reader;
76 this.luceneSearcher = new org.apache.lucene.search.IndexSearcher(reader);
77 this.luceneSearcher.setSimilarity(new NutchSimilarity());
78 this.optimizer = new LuceneQueryOptimizer(conf);
79 this.queryFilters = new QueryFilters(conf);
80 }
81
82 private Directory getDirectory(Path file) throws IOException {
83 if ("file".equals(this.fs.getUri().getScheme())) {
84 Path qualified = file.makeQualified(FileSystem.getLocal(conf));
85 File fsLocal = new File(qualified.toUri());
86 return FSDirectory.getDirectory(fsLocal.getAbsolutePath());
87 } else {
88 return new FsDirectory(this.fs, file, false, this.conf);
89 }
90 }
91
92 public Hits search(Query query, int numHits,
93 String dedupField, String sortField, boolean reverse)
94
95 throws IOException {
96 org.apache.lucene.search.BooleanQuery luceneQuery =
97 this.queryFilters.filter(query);
98 return translateHits
99 (optimizer.optimize(luceneQuery, luceneSearcher, numHits,
100 sortField, reverse),
101 dedupField, sortField);
102 }
103
104 public String getExplanation(Query query, Hit hit) throws IOException {
105 return luceneSearcher.explain(this.queryFilters.filter(query),
106 Integer.valueOf(hit.getUniqueKey())).toHtml();
107 }
108
109 public HitDetails getDetails(Hit hit) throws IOException {
110
111 Document doc = luceneSearcher.doc(Integer.valueOf(hit.getUniqueKey()));
112
113 List docFields = doc.getFields();
114 String[] fields = new String[docFields.size()];
115 String[] values = new String[docFields.size()];
116 for (int i = 0; i < docFields.size(); i++) {
117 Field field = (Field)docFields.get(i);
118 fields[i] = field.name();
119 values[i] = field.stringValue();
120 }
121
122 return new HitDetails(fields, values);
123 }
124
125 public HitDetails[] getDetails(Hit[] hits) throws IOException {
126 HitDetails[] results = new HitDetails[hits.length];
127 for (int i = 0; i < hits.length; i++)
128 results[i] = getDetails(hits[i]);
129 return results;
130 }
131
132 private Hits translateHits(TopDocs topDocs,
133 String dedupField, String sortField)
134 throws IOException {
135
136 String[] dedupValues = null;
137 if (dedupField != null)
138 dedupValues = FieldCache.DEFAULT.getStrings(reader, dedupField);
139
140 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
141 int length = scoreDocs.length;
142 Hit[] hits = new Hit[length];
143 for (int i = 0; i < length; i++) {
144
145 int doc = scoreDocs[i].doc;
146
147 WritableComparable sortValue; // convert value to writable
148 if (sortField == null) {
149 sortValue = new FloatWritable(scoreDocs[i].score);
150 } else {
151 Object raw = ((FieldDoc)scoreDocs[i]).fields[0];
152 if (raw instanceof Integer) {
153 sortValue = new IntWritable(((Integer)raw).intValue());
154 } else if (raw instanceof Float) {
155 sortValue = new FloatWritable(((Float)raw).floatValue());
156 } else if (raw instanceof String) {
157 sortValue = new Text((String)raw);
158 } else {
159 throw new RuntimeException("Unknown sort value type!");
160 }
161 }
162
163 String dedupValue = dedupValues == null ? null : dedupValues[doc];
164
165 hits[i] = new Hit(Integer.toString(doc), sortValue, dedupValue);
166 }
167 return new Hits(topDocs.totalHits, hits);
168 }
169
170 public void close() throws IOException {
171 if (luceneSearcher != null) { luceneSearcher.close(); }
172 if (reader != null) { reader.close(); }
173 }
174
175 }