1 package org.apache.lucene.index;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import java.io.IOException;
21 import java.util.Collection;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.Map;
25
26 import org.apache.lucene.document.Document;
27 import org.apache.lucene.document.FieldSelector;
28 import org.apache.lucene.index.MultiSegmentReader.MultiTermDocs;
29 import org.apache.lucene.index.MultiSegmentReader.MultiTermEnum;
30 import org.apache.lucene.index.MultiSegmentReader.MultiTermPositions;
31
32 /** An IndexReader which reads multiple indexes, appending their content.
33 *
34 * @version $Id: MultiReader.java 692921 2008-09-07 19:22:40Z mikemccand $
35 */
36 public class MultiReader extends IndexReader {
37 protected IndexReader[] subReaders;
38 private int[] starts; // 1st docno for each segment
39 private boolean[] decrefOnClose; // remember which subreaders to decRef on close
40 private Map normsCache = new HashMap();
41 private int maxDoc = 0;
42 private int numDocs = -1;
43 private boolean hasDeletions = false;
44
45 /**
46 * <p>Construct a MultiReader aggregating the named set of (sub)readers.
47 * Directory locking for delete, undeleteAll, and setNorm operations is
48 * left to the subreaders. </p>
49 * <p>Note that all subreaders are closed if this Multireader is closed.</p>
50 * @param subReaders set of (sub)readers
51 * @throws IOException
52 */
53 public MultiReader(IndexReader[] subReaders) {
54 initialize(subReaders, true);
55 }
56
57 /**
58 * <p>Construct a MultiReader aggregating the named set of (sub)readers.
59 * Directory locking for delete, undeleteAll, and setNorm operations is
60 * left to the subreaders. </p>
61 * @param closeSubReaders indicates whether the subreaders should be closed
62 * when this MultiReader is closed
63 * @param subReaders set of (sub)readers
64 * @throws IOException
65 */
66 public MultiReader(IndexReader[] subReaders, boolean closeSubReaders) {
67 initialize(subReaders, closeSubReaders);
68 }
69
70 private void initialize(IndexReader[] subReaders, boolean closeSubReaders) {
71 this.subReaders = (IndexReader[]) subReaders.clone();
72 starts = new int[subReaders.length + 1]; // build starts array
73 decrefOnClose = new boolean[subReaders.length];
74 for (int i = 0; i < subReaders.length; i++) {
75 starts[i] = maxDoc;
76 maxDoc += subReaders[i].maxDoc(); // compute maxDocs
77
78 if (!closeSubReaders) {
79 subReaders[i].incRef();
80 decrefOnClose[i] = true;
81 } else {
82 decrefOnClose[i] = false;
83 }
84
85 if (subReaders[i].hasDeletions())
86 hasDeletions = true;
87 }
88 starts[subReaders.length] = maxDoc;
89 }
90
91 /**
92 * Tries to reopen the subreaders.
93 * <br>
94 * If one or more subreaders could be re-opened (i. e. subReader.reopen()
95 * returned a new instance != subReader), then a new MultiReader instance
96 * is returned, otherwise this instance is returned.
97 * <p>
98 * A re-opened instance might share one or more subreaders with the old
99 * instance. Index modification operations result in undefined behavior
100 * when performed before the old instance is closed.
101 * (see {@link IndexReader#reopen()}).
102 * <p>
103 * If subreaders are shared, then the reference count of those
104 * readers is increased to ensure that the subreaders remain open
105 * until the last referring reader is closed.
106 *
107 * @throws CorruptIndexException if the index is corrupt
108 * @throws IOException if there is a low-level IO error
109 */
110 public IndexReader reopen() throws CorruptIndexException, IOException {
111 ensureOpen();
112
113 boolean reopened = false;
114 IndexReader[] newSubReaders = new IndexReader[subReaders.length];
115 boolean[] newDecrefOnClose = new boolean[subReaders.length];
116
117 boolean success = false;
118 try {
119 for (int i = 0; i < subReaders.length; i++) {
120 newSubReaders[i] = subReaders[i].reopen();
121 // if at least one of the subreaders was updated we remember that
122 // and return a new MultiReader
123 if (newSubReaders[i] != subReaders[i]) {
124 reopened = true;
125 // this is a new subreader instance, so on close() we don't
126 // decRef but close it
127 newDecrefOnClose[i] = false;
128 }
129 }
130
131 if (reopened) {
132 for (int i = 0; i < subReaders.length; i++) {
133 if (newSubReaders[i] == subReaders[i]) {
134 newSubReaders[i].incRef();
135 newDecrefOnClose[i] = true;
136 }
137 }
138
139 MultiReader mr = new MultiReader(newSubReaders);
140 mr.decrefOnClose = newDecrefOnClose;
141 success = true;
142 return mr;
143 } else {
144 success = true;
145 return this;
146 }
147 } finally {
148 if (!success && reopened) {
149 for (int i = 0; i < newSubReaders.length; i++) {
150 if (newSubReaders[i] != null) {
151 try {
152 if (newDecrefOnClose[i]) {
153 newSubReaders[i].decRef();
154 } else {
155 newSubReaders[i].close();
156 }
157 } catch (IOException ignore) {
158 // keep going - we want to clean up as much as possible
159 }
160 }
161 }
162 }
163 }
164 }
165
166 public TermFreqVector[] getTermFreqVectors(int n) throws IOException {
167 ensureOpen();
168 int i = readerIndex(n); // find segment num
169 return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment
170 }
171
172 public TermFreqVector getTermFreqVector(int n, String field)
173 throws IOException {
174 ensureOpen();
175 int i = readerIndex(n); // find segment num
176 return subReaders[i].getTermFreqVector(n - starts[i], field);
177 }
178
179
180 public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
181 ensureOpen();
182 int i = readerIndex(docNumber); // find segment num
183 subReaders[i].getTermFreqVector(docNumber - starts[i], field, mapper);
184 }
185
186 public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
187 ensureOpen();
188 int i = readerIndex(docNumber); // find segment num
189 subReaders[i].getTermFreqVector(docNumber - starts[i], mapper);
190 }
191
192 public boolean isOptimized() {
193 return false;
194 }
195
196 public synchronized int numDocs() {
197 // Don't call ensureOpen() here (it could affect performance)
198 if (numDocs == -1) { // check cache
199 int n = 0; // cache miss--recompute
200 for (int i = 0; i < subReaders.length; i++)
201 n += subReaders[i].numDocs(); // sum from readers
202 numDocs = n;
203 }
204 return numDocs;
205 }
206
207 public int maxDoc() {
208 // Don't call ensureOpen() here (it could affect performance)
209 return maxDoc;
210 }
211
212 // inherit javadoc
213 public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
214 ensureOpen();
215 int i = readerIndex(n); // find segment num
216 return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
217 }
218
219 public boolean isDeleted(int n) {
220 // Don't call ensureOpen() here (it could affect performance)
221 int i = readerIndex(n); // find segment num
222 return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader
223 }
224
225 public boolean hasDeletions() {
226 // Don't call ensureOpen() here (it could affect performance)
227 return hasDeletions;
228 }
229
230 protected void doDelete(int n) throws CorruptIndexException, IOException {
231 numDocs = -1; // invalidate cache
232 int i = readerIndex(n); // find segment num
233 subReaders[i].deleteDocument(n - starts[i]); // dispatch to segment reader
234 hasDeletions = true;
235 }
236
237 protected void doUndeleteAll() throws CorruptIndexException, IOException {
238 for (int i = 0; i < subReaders.length; i++)
239 subReaders[i].undeleteAll();
240
241 hasDeletions = false;
242 numDocs = -1; // invalidate cache
243 }
244
245 private int readerIndex(int n) { // find reader for doc n:
246 return MultiSegmentReader.readerIndex(n, this.starts, this.subReaders.length);
247 }
248
249 public boolean hasNorms(String field) throws IOException {
250 ensureOpen();
251 for (int i = 0; i < subReaders.length; i++) {
252 if (subReaders[i].hasNorms(field)) return true;
253 }
254 return false;
255 }
256
257 private byte[] ones;
258 private byte[] fakeNorms() {
259 if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc());
260 return ones;
261 }
262
263 public synchronized byte[] norms(String field) throws IOException {
264 ensureOpen();
265 byte[] bytes = (byte[])normsCache.get(field);
266 if (bytes != null)
267 return bytes; // cache hit
268 if (!hasNorms(field))
269 return fakeNorms();
270
271 bytes = new byte[maxDoc()];
272 for (int i = 0; i < subReaders.length; i++)
273 subReaders[i].norms(field, bytes, starts[i]);
274 normsCache.put(field, bytes); // update cache
275 return bytes;
276 }
277
278 public synchronized void norms(String field, byte[] result, int offset)
279 throws IOException {
280 ensureOpen();
281 byte[] bytes = (byte[])normsCache.get(field);
282 if (bytes==null && !hasNorms(field)) bytes=fakeNorms();
283 if (bytes != null) // cache hit
284 System.arraycopy(bytes, 0, result, offset, maxDoc());
285
286 for (int i = 0; i < subReaders.length; i++) // read from segments
287 subReaders[i].norms(field, result, offset + starts[i]);
288 }
289
290 protected void doSetNorm(int n, String field, byte value)
291 throws CorruptIndexException, IOException {
292 synchronized (normsCache) {
293 normsCache.remove(field); // clear cache
294 }
295 int i = readerIndex(n); // find segment num
296 subReaders[i].setNorm(n-starts[i], field, value); // dispatch
297 }
298
299 public TermEnum terms() throws IOException {
300 ensureOpen();
301 return new MultiTermEnum(subReaders, starts, null);
302 }
303
304 public TermEnum terms(Term term) throws IOException {
305 ensureOpen();
306 return new MultiTermEnum(subReaders, starts, term);
307 }
308
309 public int docFreq(Term t) throws IOException {
310 ensureOpen();
311 int total = 0; // sum freqs in segments
312 for (int i = 0; i < subReaders.length; i++)
313 total += subReaders[i].docFreq(t);
314 return total;
315 }
316
317 public TermDocs termDocs() throws IOException {
318 ensureOpen();
319 return new MultiTermDocs(subReaders, starts);
320 }
321
322 public TermPositions termPositions() throws IOException {
323 ensureOpen();
324 return new MultiTermPositions(subReaders, starts);
325 }
326
327 protected void doCommit() throws IOException {
328 for (int i = 0; i < subReaders.length; i++)
329 subReaders[i].commit();
330 }
331
332 protected synchronized void doClose() throws IOException {
333 for (int i = 0; i < subReaders.length; i++) {
334 if (decrefOnClose[i]) {
335 subReaders[i].decRef();
336 } else {
337 subReaders[i].close();
338 }
339 }
340 }
341
342 public Collection getFieldNames (IndexReader.FieldOption fieldNames) {
343 ensureOpen();
344 return MultiSegmentReader.getFieldNames(fieldNames, this.subReaders);
345 }
346
347 /**
348 * Checks recursively if all subreaders are up to date.
349 */
350 public boolean isCurrent() throws CorruptIndexException, IOException {
351 for (int i = 0; i < subReaders.length; i++) {
352 if (!subReaders[i].isCurrent()) {
353 return false;
354 }
355 }
356
357 // all subreaders are up to date
358 return true;
359 }
360
361 /** Not implemented.
362 * @throws UnsupportedOperationException
363 */
364 public long getVersion() {
365 throw new UnsupportedOperationException("MultiReader does not support this method.");
366 }
367
368 // for testing
369 IndexReader[] getSubReaders() {
370 return subReaders;
371 }
372 }