1 package org.apache.lucene.search;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import java.io.IOException;
21 import java.util.ConcurrentModificationException;
22 import java.util.Vector;
23 import java.util.Iterator;
24
25 import org.apache.lucene.document.Document;
26 import org.apache.lucene.index.CorruptIndexException;
27
28 /** A ranked list of documents, used to hold search results.
29 * <p>
30 * <b>Caution:</b> Iterate only over the hits needed. Iterating over all
31 * hits is generally not desirable and may be the source of
32 * performance issues. If you need to iterate over many or all hits, consider
33 * using the search method that takes a {@link HitCollector}.
34 * </p>
35 * <p><b>Note:</b> Deleting matching documents concurrently with traversing
36 * the hits, might, when deleting hits that were not yet retrieved, decrease
37 * {@link #length()}. In such case,
38 * {@link java.util.ConcurrentModificationException ConcurrentModificationException}
39 * is thrown when accessing hit <code>n</code> ≥ current_{@link #length()}
40 * (but <code>n</code> < {@link #length()}_at_start).
41 */
42 public final class Hits {
43 private Weight weight;
44 private Searcher searcher;
45 private Filter filter = null;
46 private Sort sort = null;
47
48 private int length; // the total number of hits
49 private Vector hitDocs = new Vector(); // cache of hits retrieved
50
51 private HitDoc first; // head of LRU cache
52 private HitDoc last; // tail of LRU cache
53 private int numDocs = 0; // number cached
54 private int maxDocs = 200; // max to cache
55
56 private int nDeletions; // # deleted docs in the index.
57 private int lengthAtStart; // this is the number apps usually count on (although deletions can bring it down).
58 private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
59
60 boolean debugCheckedForDeletions = false; // for test purposes.
61
62 Hits(Searcher s, Query q, Filter f) throws IOException {
63 weight = q.weight(s);
64 searcher = s;
65 filter = f;
66 nDeletions = countDeletions(s);
67 getMoreDocs(50); // retrieve 100 initially
68 lengthAtStart = length;
69 }
70
71 Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
72 weight = q.weight(s);
73 searcher = s;
74 filter = f;
75 sort = o;
76 nDeletions = countDeletions(s);
77 getMoreDocs(50); // retrieve 100 initially
78 lengthAtStart = length;
79 }
80
81 // count # deletions, return -1 if unknown.
82 private int countDeletions(Searcher s) throws IOException {
83 int cnt = -1;
84 if (s instanceof IndexSearcher) {
85 cnt = s.maxDoc() - ((IndexSearcher) s).getIndexReader().numDocs();
86 }
87 return cnt;
88 }
89
90 /**
91 * Tries to add new documents to hitDocs.
92 * Ensures that the hit numbered <code>min</code> has been retrieved.
93 */
94 private final void getMoreDocs(int min) throws IOException {
95 if (hitDocs.size() > min) {
96 min = hitDocs.size();
97 }
98
99 int n = min * 2; // double # retrieved
100 TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight, filter, n, sort);
101
102 length = topDocs.totalHits;
103 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
104
105 float scoreNorm = 1.0f;
106
107 if (length > 0 && topDocs.getMaxScore() > 1.0f) {
108 scoreNorm = 1.0f / topDocs.getMaxScore();
109 }
110
111 int start = hitDocs.size() - nDeletedHits;
112
113 // any new deletions?
114 int nDels2 = countDeletions(searcher);
115 debugCheckedForDeletions = false;
116 if (nDeletions < 0 || nDels2 > nDeletions) {
117 // either we cannot count deletions, or some "previously valid hits" might have been deleted, so find exact start point
118 nDeletedHits = 0;
119 debugCheckedForDeletions = true;
120 int i2 = 0;
121 for (int i1=0; i1<hitDocs.size() && i2<scoreDocs.length; i1++) {
122 int id1 = ((HitDoc)hitDocs.get(i1)).id;
123 int id2 = scoreDocs[i2].doc;
124 if (id1 == id2) {
125 i2++;
126 } else {
127 nDeletedHits ++;
128 }
129 }
130 start = i2;
131 }
132
133 int end = scoreDocs.length < length ? scoreDocs.length : length;
134 length += nDeletedHits;
135 for (int i = start; i < end; i++) {
136 hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm,
137 scoreDocs[i].doc));
138 }
139
140 nDeletions = nDels2;
141 }
142
143 /** Returns the total number of hits available in this set. */
144 public final int length() {
145 return length;
146 }
147
148 /** Returns the stored fields of the n<sup>th</sup> document in this set.
149 * <p>Documents are cached, so that repeated requests for the same element may
150 * return the same Document object.
151 * @throws CorruptIndexException if the index is corrupt
152 * @throws IOException if there is a low-level IO error
153 */
154 public final Document doc(int n) throws CorruptIndexException, IOException {
155 HitDoc hitDoc = hitDoc(n);
156
157 // Update LRU cache of documents
158 remove(hitDoc); // remove from list, if there
159 addToFront(hitDoc); // add to front of list
160 if (numDocs > maxDocs) { // if cache is full
161 HitDoc oldLast = last;
162 remove(last); // flush last
163 oldLast.doc = null; // let doc get gc'd
164 }
165
166 if (hitDoc.doc == null) {
167 hitDoc.doc = searcher.doc(hitDoc.id); // cache miss: read document
168 }
169
170 return hitDoc.doc;
171 }
172
173 /** Returns the score for the n<sup>th</sup> document in this set. */
174 public final float score(int n) throws IOException {
175 return hitDoc(n).score;
176 }
177
178 /** Returns the id for the n<sup>th</sup> document in this set.
179 * Note that ids may change when the index changes, so you cannot
180 * rely on the id to be stable.
181 */
182 public final int id(int n) throws IOException {
183 return hitDoc(n).id;
184 }
185
186 /**
187 * Returns a {@link HitIterator} to navigate the Hits. Each item returned
188 * from {@link Iterator#next()} is a {@link Hit}.
189 * <p>
190 * <b>Caution:</b> Iterate only over the hits needed. Iterating over all
191 * hits is generally not desirable and may be the source of
192 * performance issues. If you need to iterate over many or all hits, consider
193 * using a search method that takes a {@link HitCollector}.
194 * </p>
195 */
196 public Iterator iterator() {
197 return new HitIterator(this);
198 }
199
200 private final HitDoc hitDoc(int n) throws IOException {
201 if (n >= lengthAtStart) {
202 throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
203 }
204
205 if (n >= hitDocs.size()) {
206 getMoreDocs(n);
207 }
208
209 if (n >= length) {
210 throw new ConcurrentModificationException("Not a valid hit number: " + n);
211 }
212
213 return (HitDoc) hitDocs.elementAt(n);
214 }
215
216 private final void addToFront(HitDoc hitDoc) { // insert at front of cache
217 if (first == null) {
218 last = hitDoc;
219 } else {
220 first.prev = hitDoc;
221 }
222
223 hitDoc.next = first;
224 first = hitDoc;
225 hitDoc.prev = null;
226
227 numDocs++;
228 }
229
230 private final void remove(HitDoc hitDoc) { // remove from cache
231 if (hitDoc.doc == null) { // it's not in the list
232 return; // abort
233 }
234
235 if (hitDoc.next == null) {
236 last = hitDoc.prev;
237 } else {
238 hitDoc.next.prev = hitDoc.prev;
239 }
240
241 if (hitDoc.prev == null) {
242 first = hitDoc.next;
243 } else {
244 hitDoc.prev.next = hitDoc.next;
245 }
246
247 numDocs--;
248 }
249 }
250
251 final class HitDoc {
252 float score;
253 int id;
254 Document doc = null;
255
256 HitDoc next; // in doubly-linked cache
257 HitDoc prev; // in doubly-linked cache
258
259 HitDoc(float s, int i) {
260 score = s;
261 id = i;
262 }
263 }