1 package org.apache.lucene.search;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import java.io.IOException;
21 import java.util.ConcurrentModificationException;
22 import java.util.Vector;
23 import java.util.Iterator;
24
25 import org.apache.lucene.document.Document;
26 import org.apache.lucene.index.CorruptIndexException;
27
28 /** A ranked list of documents, used to hold search results.
29 * <p>
30 * <b>Caution:</b> Iterate only over the hits needed. Iterating over all
31 * hits is generally not desirable and may be the source of
32 * performance issues. If you need to iterate over many or all hits, consider
33 * using the search method that takes a {@link HitCollector}.
34 * </p>
35 * <p><b>Note:</b> Deleting matching documents concurrently with traversing
36 * the hits, might, when deleting hits that were not yet retrieved, decrease
37 * {@link #length()}. In such case,
38 * {@link java.util.ConcurrentModificationException ConcurrentModificationException}
39 * is thrown when accessing hit <code>n</code> ≥ current_{@link #length()}
40 * (but <code>n</code> < {@link #length()}_at_start).
41 *
42 * @deprecated Hits will be removed in Lucene 3.0. <p>
43 * Instead e. g. {@link TopDocCollector} and {@link TopDocs} can be used:<br>
44 * <pre>
45 * TopDocCollector collector = new TopDocCollector(hitsPerPage);
46 * searcher.search(query, collector);
47 * ScoreDoc[] hits = collector.topDocs().scoreDocs;
48 * for (int i = 0; i < hits.length; i++) {
49 * int docId = hits[i].doc;
50 * Document d = searcher.doc(docId);
51 * // do something with current hit
52 * ...
53 * </pre>
54 */
55 public final class Hits {
56 private Weight weight;
57 private Searcher searcher;
58 private Filter filter = null;
59 private Sort sort = null;
60
61 private int length; // the total number of hits
62 private Vector hitDocs = new Vector(); // cache of hits retrieved
63
64 private HitDoc first; // head of LRU cache
65 private HitDoc last; // tail of LRU cache
66 private int numDocs = 0; // number cached
67 private int maxDocs = 200; // max to cache
68
69 private int nDeletions; // # deleted docs in the index.
70 private int lengthAtStart; // this is the number apps usually count on (although deletions can bring it down).
71 private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
72
73 boolean debugCheckedForDeletions = false; // for test purposes.
74
75 Hits(Searcher s, Query q, Filter f) throws IOException {
76 weight = q.weight(s);
77 searcher = s;
78 filter = f;
79 nDeletions = countDeletions(s);
80 getMoreDocs(50); // retrieve 100 initially
81 lengthAtStart = length;
82 }
83
84 Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
85 weight = q.weight(s);
86 searcher = s;
87 filter = f;
88 sort = o;
89 nDeletions = countDeletions(s);
90 getMoreDocs(50); // retrieve 100 initially
91 lengthAtStart = length;
92 }
93
94 // count # deletions, return -1 if unknown.
95 private int countDeletions(Searcher s) throws IOException {
96 int cnt = -1;
97 if (s instanceof IndexSearcher) {
98 cnt = s.maxDoc() - ((IndexSearcher) s).getIndexReader().numDocs();
99 }
100 return cnt;
101 }
102
103 /**
104 * Tries to add new documents to hitDocs.
105 * Ensures that the hit numbered <code>min</code> has been retrieved.
106 */
107 private final void getMoreDocs(int min) throws IOException {
108 if (hitDocs.size() > min) {
109 min = hitDocs.size();
110 }
111
112 int n = min * 2; // double # retrieved
113 TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight, filter, n, sort);
114
115 length = topDocs.totalHits;
116 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
117
118 float scoreNorm = 1.0f;
119
120 if (length > 0 && topDocs.getMaxScore() > 1.0f) {
121 scoreNorm = 1.0f / topDocs.getMaxScore();
122 }
123
124 int start = hitDocs.size() - nDeletedHits;
125
126 // any new deletions?
127 int nDels2 = countDeletions(searcher);
128 debugCheckedForDeletions = false;
129 if (nDeletions < 0 || nDels2 > nDeletions) {
130 // either we cannot count deletions, or some "previously valid hits" might have been deleted, so find exact start point
131 nDeletedHits = 0;
132 debugCheckedForDeletions = true;
133 int i2 = 0;
134 for (int i1=0; i1<hitDocs.size() && i2<scoreDocs.length; i1++) {
135 int id1 = ((HitDoc)hitDocs.get(i1)).id;
136 int id2 = scoreDocs[i2].doc;
137 if (id1 == id2) {
138 i2++;
139 } else {
140 nDeletedHits ++;
141 }
142 }
143 start = i2;
144 }
145
146 int end = scoreDocs.length < length ? scoreDocs.length : length;
147 length += nDeletedHits;
148 for (int i = start; i < end; i++) {
149 hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm,
150 scoreDocs[i].doc));
151 }
152
153 nDeletions = nDels2;
154 }
155
156 /** Returns the total number of hits available in this set. */
157 public final int length() {
158 return length;
159 }
160
161 /** Returns the stored fields of the n<sup>th</sup> document in this set.
162 * <p>Documents are cached, so that repeated requests for the same element may
163 * return the same Document object.
164 * @throws CorruptIndexException if the index is corrupt
165 * @throws IOException if there is a low-level IO error
166 */
167 public final Document doc(int n) throws CorruptIndexException, IOException {
168 HitDoc hitDoc = hitDoc(n);
169
170 // Update LRU cache of documents
171 remove(hitDoc); // remove from list, if there
172 addToFront(hitDoc); // add to front of list
173 if (numDocs > maxDocs) { // if cache is full
174 HitDoc oldLast = last;
175 remove(last); // flush last
176 oldLast.doc = null; // let doc get gc'd
177 }
178
179 if (hitDoc.doc == null) {
180 hitDoc.doc = searcher.doc(hitDoc.id); // cache miss: read document
181 }
182
183 return hitDoc.doc;
184 }
185
186 /** Returns the score for the n<sup>th</sup> document in this set. */
187 public final float score(int n) throws IOException {
188 return hitDoc(n).score;
189 }
190
191 /** Returns the id for the n<sup>th</sup> document in this set.
192 * Note that ids may change when the index changes, so you cannot
193 * rely on the id to be stable.
194 */
195 public final int id(int n) throws IOException {
196 return hitDoc(n).id;
197 }
198
199 /**
200 * Returns a {@link HitIterator} to navigate the Hits. Each item returned
201 * from {@link Iterator#next()} is a {@link Hit}.
202 * <p>
203 * <b>Caution:</b> Iterate only over the hits needed. Iterating over all
204 * hits is generally not desirable and may be the source of
205 * performance issues. If you need to iterate over many or all hits, consider
206 * using a search method that takes a {@link HitCollector}.
207 * </p>
208 */
209 public Iterator iterator() {
210 return new HitIterator(this);
211 }
212
213 private final HitDoc hitDoc(int n) throws IOException {
214 if (n >= lengthAtStart) {
215 throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
216 }
217
218 if (n >= hitDocs.size()) {
219 getMoreDocs(n);
220 }
221
222 if (n >= length) {
223 throw new ConcurrentModificationException("Not a valid hit number: " + n);
224 }
225
226 return (HitDoc) hitDocs.elementAt(n);
227 }
228
229 private final void addToFront(HitDoc hitDoc) { // insert at front of cache
230 if (first == null) {
231 last = hitDoc;
232 } else {
233 first.prev = hitDoc;
234 }
235
236 hitDoc.next = first;
237 first = hitDoc;
238 hitDoc.prev = null;
239
240 numDocs++;
241 }
242
243 private final void remove(HitDoc hitDoc) { // remove from cache
244 if (hitDoc.doc == null) { // it's not in the list
245 return; // abort
246 }
247
248 if (hitDoc.next == null) {
249 last = hitDoc.prev;
250 } else {
251 hitDoc.next.prev = hitDoc.prev;
252 }
253
254 if (hitDoc.prev == null) {
255 first = hitDoc.next;
256 } else {
257 hitDoc.prev.next = hitDoc.next;
258 }
259
260 numDocs--;
261 }
262 }
263
264 final class HitDoc {
265 float score;
266 int id;
267 Document doc = null;
268
269 HitDoc next; // in doubly-linked cache
270 HitDoc prev; // in doubly-linked cache
271
272 HitDoc(float s, int i) {
273 score = s;
274 id = i;
275 }
276 }