1 package org.apache.lucene.index;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import java.io.IOException;
21 import java.util.Collection;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.Iterator;
25 import java.util.Map;
26 import java.util.Set;
27
28 import org.apache.lucene.document.Document;
29 import org.apache.lucene.document.FieldSelector;
30 import org.apache.lucene.store.Directory;
31
32 /**
33 * An IndexReader which reads indexes with multiple segments.
34 */
35 class MultiSegmentReader extends DirectoryIndexReader {
36 protected SegmentReader[] subReaders;
37 private int[] starts; // 1st docno for each segment
38 private Map normsCache = new HashMap();
39 private int maxDoc = 0;
40 private int numDocs = -1;
41 private boolean hasDeletions = false;
42
43 /** Construct reading the named set of readers. */
44 MultiSegmentReader(Directory directory, SegmentInfos sis, boolean closeDirectory, boolean readOnly) throws IOException {
45 super(directory, sis, closeDirectory, readOnly);
46
47 // To reduce the chance of hitting FileNotFound
48 // (and having to retry), we open segments in
49 // reverse because IndexWriter merges & deletes
50 // the newest segments first.
51
52 SegmentReader[] readers = new SegmentReader[sis.size()];
53 for (int i = sis.size()-1; i >= 0; i--) {
54 try {
55 readers[i] = SegmentReader.get(readOnly, sis.info(i));
56 } catch (IOException e) {
57 // Close all readers we had opened:
58 for(i++;i<sis.size();i++) {
59 try {
60 readers[i].close();
61 } catch (IOException ignore) {
62 // keep going - we want to clean up as much as possible
63 }
64 }
65 throw e;
66 }
67 }
68
69 initialize(readers);
70 }
71
72 /** This contructor is only used for {@link #reopen()} */
73 MultiSegmentReader(Directory directory, SegmentInfos infos, boolean closeDirectory, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean readOnly) throws IOException {
74 super(directory, infos, closeDirectory, readOnly);
75
76 // we put the old SegmentReaders in a map, that allows us
77 // to lookup a reader using its segment name
78 Map segmentReaders = new HashMap();
79
80 if (oldReaders != null) {
81 // create a Map SegmentName->SegmentReader
82 for (int i = 0; i < oldReaders.length; i++) {
83 segmentReaders.put(oldReaders[i].getSegmentName(), new Integer(i));
84 }
85 }
86
87 SegmentReader[] newReaders = new SegmentReader[infos.size()];
88
89 // remember which readers are shared between the old and the re-opened
90 // MultiSegmentReader - we have to incRef those readers
91 boolean[] readerShared = new boolean[infos.size()];
92
93 for (int i = infos.size() - 1; i>=0; i--) {
94 // find SegmentReader for this segment
95 Integer oldReaderIndex = (Integer) segmentReaders.get(infos.info(i).name);
96 if (oldReaderIndex == null) {
97 // this is a new segment, no old SegmentReader can be reused
98 newReaders[i] = null;
99 } else {
100 // there is an old reader for this segment - we'll try to reopen it
101 newReaders[i] = oldReaders[oldReaderIndex.intValue()];
102 }
103
104 boolean success = false;
105 try {
106 SegmentReader newReader;
107 if (newReaders[i] == null || infos.info(i).getUseCompoundFile() != newReaders[i].getSegmentInfo().getUseCompoundFile()) {
108 // this is a new reader; in case we hit an exception we can close it safely
109 newReader = SegmentReader.get(readOnly, infos.info(i));
110 } else {
111 newReader = (SegmentReader) newReaders[i].reopenSegment(infos.info(i));
112 }
113 if (newReader == newReaders[i]) {
114 // this reader will be shared between the old and the new one,
115 // so we must incRef it
116 readerShared[i] = true;
117 newReader.incRef();
118 } else {
119 readerShared[i] = false;
120 newReaders[i] = newReader;
121 }
122 success = true;
123 } finally {
124 if (!success) {
125 for (i++; i < infos.size(); i++) {
126 if (newReaders[i] != null) {
127 try {
128 if (!readerShared[i]) {
129 // this is a new subReader that is not used by the old one,
130 // we can close it
131 newReaders[i].close();
132 } else {
133 // this subReader is also used by the old reader, so instead
134 // closing we must decRef it
135 newReaders[i].decRef();
136 }
137 } catch (IOException ignore) {
138 // keep going - we want to clean up as much as possible
139 }
140 }
141 }
142 }
143 }
144 }
145
146 // initialize the readers to calculate maxDoc before we try to reuse the old normsCache
147 initialize(newReaders);
148
149 // try to copy unchanged norms from the old normsCache to the new one
150 if (oldNormsCache != null) {
151 Iterator it = oldNormsCache.entrySet().iterator();
152 while (it.hasNext()) {
153 Map.Entry entry = (Map.Entry) it.next();
154 String field = (String) entry.getKey();
155 if (!hasNorms(field)) {
156 continue;
157 }
158
159 byte[] oldBytes = (byte[]) entry.getValue();
160
161 byte[] bytes = new byte[maxDoc()];
162
163 for (int i = 0; i < subReaders.length; i++) {
164 Integer oldReaderIndex = ((Integer) segmentReaders.get(subReaders[i].getSegmentName()));
165
166 // this SegmentReader was not re-opened, we can copy all of its norms
167 if (oldReaderIndex != null &&
168 (oldReaders[oldReaderIndex.intValue()] == subReaders[i]
169 || oldReaders[oldReaderIndex.intValue()].norms.get(field) == subReaders[i].norms.get(field))) {
170 // we don't have to synchronize here: either this constructor is called from a SegmentReader,
171 // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
172 // which is synchronized
173 System.arraycopy(oldBytes, oldStarts[oldReaderIndex.intValue()], bytes, starts[i], starts[i+1] - starts[i]);
174 } else {
175 subReaders[i].norms(field, bytes, starts[i]);
176 }
177 }
178
179 normsCache.put(field, bytes); // update cache
180 }
181 }
182 }
183
184 private void initialize(SegmentReader[] subReaders) {
185 this.subReaders = subReaders;
186 starts = new int[subReaders.length + 1]; // build starts array
187 for (int i = 0; i < subReaders.length; i++) {
188 starts[i] = maxDoc;
189 maxDoc += subReaders[i].maxDoc(); // compute maxDocs
190
191 if (subReaders[i].hasDeletions())
192 hasDeletions = true;
193 }
194 starts[subReaders.length] = maxDoc;
195 }
196
197 protected synchronized DirectoryIndexReader doReopen(SegmentInfos infos) throws CorruptIndexException, IOException {
198 if (infos.size() == 1) {
199 // The index has only one segment now, so we can't refresh the MultiSegmentReader.
200 // Return a new [ReadOnly]SegmentReader instead
201 return SegmentReader.get(readOnly, infos, infos.info(0), false);
202 } else if (readOnly) {
203 return new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache);
204 } else {
205 return new MultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache, false);
206 }
207 }
208
209 public TermFreqVector[] getTermFreqVectors(int n) throws IOException {
210 ensureOpen();
211 int i = readerIndex(n); // find segment num
212 return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment
213 }
214
215 public TermFreqVector getTermFreqVector(int n, String field)
216 throws IOException {
217 ensureOpen();
218 int i = readerIndex(n); // find segment num
219 return subReaders[i].getTermFreqVector(n - starts[i], field);
220 }
221
222
223 public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
224 ensureOpen();
225 int i = readerIndex(docNumber); // find segment num
226 subReaders[i].getTermFreqVector(docNumber - starts[i], field, mapper);
227 }
228
229 public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
230 ensureOpen();
231 int i = readerIndex(docNumber); // find segment num
232 subReaders[i].getTermFreqVector(docNumber - starts[i], mapper);
233 }
234
235 public boolean isOptimized() {
236 return false;
237 }
238
239 public synchronized int numDocs() {
240 // Don't call ensureOpen() here (it could affect performance)
241 if (numDocs == -1) { // check cache
242 int n = 0; // cache miss--recompute
243 for (int i = 0; i < subReaders.length; i++)
244 n += subReaders[i].numDocs(); // sum from readers
245 numDocs = n;
246 }
247 return numDocs;
248 }
249
250 public int maxDoc() {
251 // Don't call ensureOpen() here (it could affect performance)
252 return maxDoc;
253 }
254
255 // inherit javadoc
256 public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
257 ensureOpen();
258 int i = readerIndex(n); // find segment num
259 return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
260 }
261
262 public boolean isDeleted(int n) {
263 // Don't call ensureOpen() here (it could affect performance)
264 final int i = readerIndex(n); // find segment num
265 return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader
266 }
267
268 public boolean hasDeletions() {
269 // Don't call ensureOpen() here (it could affect performance)
270 return hasDeletions;
271 }
272
273 protected void doDelete(int n) throws CorruptIndexException, IOException {
274 numDocs = -1; // invalidate cache
275 int i = readerIndex(n); // find segment num
276 subReaders[i].deleteDocument(n - starts[i]); // dispatch to segment reader
277 hasDeletions = true;
278 }
279
280 protected void doUndeleteAll() throws CorruptIndexException, IOException {
281 for (int i = 0; i < subReaders.length; i++)
282 subReaders[i].undeleteAll();
283
284 hasDeletions = false;
285 numDocs = -1; // invalidate cache
286 }
287
288 private int readerIndex(int n) { // find reader for doc n:
289 return readerIndex(n, this.starts, this.subReaders.length);
290 }
291
292 final static int readerIndex(int n, int[] starts, int numSubReaders) { // find reader for doc n:
293 int lo = 0; // search starts array
294 int hi = numSubReaders - 1; // for first element less
295
296 while (hi >= lo) {
297 int mid = (lo + hi) >>> 1;
298 int midValue = starts[mid];
299 if (n < midValue)
300 hi = mid - 1;
301 else if (n > midValue)
302 lo = mid + 1;
303 else { // found a match
304 while (mid+1 < numSubReaders && starts[mid+1] == midValue) {
305 mid++; // scan to last match
306 }
307 return mid;
308 }
309 }
310 return hi;
311 }
312
313 public boolean hasNorms(String field) throws IOException {
314 ensureOpen();
315 for (int i = 0; i < subReaders.length; i++) {
316 if (subReaders[i].hasNorms(field)) return true;
317 }
318 return false;
319 }
320
321 private byte[] ones;
322 private byte[] fakeNorms() {
323 if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc());
324 return ones;
325 }
326
327 public synchronized byte[] norms(String field) throws IOException {
328 ensureOpen();
329 byte[] bytes = (byte[])normsCache.get(field);
330 if (bytes != null)
331 return bytes; // cache hit
332 if (!hasNorms(field))
333 return fakeNorms();
334
335 bytes = new byte[maxDoc()];
336 for (int i = 0; i < subReaders.length; i++)
337 subReaders[i].norms(field, bytes, starts[i]);
338 normsCache.put(field, bytes); // update cache
339 return bytes;
340 }
341
342 public synchronized void norms(String field, byte[] result, int offset)
343 throws IOException {
344 ensureOpen();
345 byte[] bytes = (byte[])normsCache.get(field);
346 if (bytes==null && !hasNorms(field)) bytes=fakeNorms();
347 if (bytes != null) // cache hit
348 System.arraycopy(bytes, 0, result, offset, maxDoc());
349
350 for (int i = 0; i < subReaders.length; i++) // read from segments
351 subReaders[i].norms(field, result, offset + starts[i]);
352 }
353
354 protected void doSetNorm(int n, String field, byte value)
355 throws CorruptIndexException, IOException {
356 synchronized (normsCache) {
357 normsCache.remove(field); // clear cache
358 }
359 int i = readerIndex(n); // find segment num
360 subReaders[i].setNorm(n-starts[i], field, value); // dispatch
361 }
362
363 public TermEnum terms() throws IOException {
364 ensureOpen();
365 return new MultiTermEnum(subReaders, starts, null);
366 }
367
368 public TermEnum terms(Term term) throws IOException {
369 ensureOpen();
370 return new MultiTermEnum(subReaders, starts, term);
371 }
372
373 public int docFreq(Term t) throws IOException {
374 ensureOpen();
375 int total = 0; // sum freqs in segments
376 for (int i = 0; i < subReaders.length; i++)
377 total += subReaders[i].docFreq(t);
378 return total;
379 }
380
381 public TermDocs termDocs() throws IOException {
382 ensureOpen();
383 return new MultiTermDocs(subReaders, starts);
384 }
385
386 public TermPositions termPositions() throws IOException {
387 ensureOpen();
388 return new MultiTermPositions(subReaders, starts);
389 }
390
391 protected void commitChanges() throws IOException {
392 for (int i = 0; i < subReaders.length; i++)
393 subReaders[i].commit();
394 }
395
396 void startCommit() {
397 super.startCommit();
398 for (int i = 0; i < subReaders.length; i++) {
399 subReaders[i].startCommit();
400 }
401 }
402
403 void rollbackCommit() {
404 super.rollbackCommit();
405 for (int i = 0; i < subReaders.length; i++) {
406 subReaders[i].rollbackCommit();
407 }
408 }
409
410 protected synchronized void doClose() throws IOException {
411 for (int i = 0; i < subReaders.length; i++)
412 subReaders[i].decRef();
413
414 // maybe close directory
415 super.doClose();
416 }
417
418 public Collection getFieldNames (IndexReader.FieldOption fieldNames) {
419 ensureOpen();
420 return getFieldNames(fieldNames, this.subReaders);
421 }
422
423 static Collection getFieldNames (IndexReader.FieldOption fieldNames, IndexReader[] subReaders) {
424 // maintain a unique set of field names
425 Set fieldSet = new HashSet();
426 for (int i = 0; i < subReaders.length; i++) {
427 IndexReader reader = subReaders[i];
428 Collection names = reader.getFieldNames(fieldNames);
429 fieldSet.addAll(names);
430 }
431 return fieldSet;
432 }
433
434 // for testing
435 SegmentReader[] getSubReaders() {
436 return subReaders;
437 }
438
439 public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
440 for (int i = 0; i < subReaders.length; i++)
441 subReaders[i].setTermInfosIndexDivisor(indexDivisor);
442 }
443
444 public int getTermInfosIndexDivisor() throws IllegalStateException {
445 if (subReaders.length > 0)
446 return subReaders[0].getTermInfosIndexDivisor();
447 else
448 throw new IllegalStateException("no readers");
449 }
450
451 static class MultiTermEnum extends TermEnum {
452 private SegmentMergeQueue queue;
453
454 private Term term;
455 private int docFreq;
456
457 public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
458 throws IOException {
459 queue = new SegmentMergeQueue(readers.length);
460 for (int i = 0; i < readers.length; i++) {
461 IndexReader reader = readers[i];
462 TermEnum termEnum;
463
464 if (t != null) {
465 termEnum = reader.terms(t);
466 } else
467 termEnum = reader.terms();
468
469 SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
470 if (t == null ? smi.next() : termEnum.term() != null)
471 queue.put(smi); // initialize queue
472 else
473 smi.close();
474 }
475
476 if (t != null && queue.size() > 0) {
477 next();
478 }
479 }
480
481 public boolean next() throws IOException {
482 SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
483 if (top == null) {
484 term = null;
485 return false;
486 }
487
488 term = top.term;
489 docFreq = 0;
490
491 while (top != null && term.compareTo(top.term) == 0) {
492 queue.pop();
493 docFreq += top.termEnum.docFreq(); // increment freq
494 if (top.next())
495 queue.put(top); // restore queue
496 else
497 top.close(); // done with a segment
498 top = (SegmentMergeInfo)queue.top();
499 }
500 return true;
501 }
502
503 public Term term() {
504 return term;
505 }
506
507 public int docFreq() {
508 return docFreq;
509 }
510
511 public void close() throws IOException {
512 queue.close();
513 }
514 }
515
516 static class MultiTermDocs implements TermDocs {
517 protected IndexReader[] readers;
518 protected int[] starts;
519 protected Term term;
520
521 protected int base = 0;
522 protected int pointer = 0;
523
524 private TermDocs[] readerTermDocs;
525 protected TermDocs current; // == readerTermDocs[pointer]
526
527 public MultiTermDocs(IndexReader[] r, int[] s) {
528 readers = r;
529 starts = s;
530
531 readerTermDocs = new TermDocs[r.length];
532 }
533
534 public int doc() {
535 return base + current.doc();
536 }
537 public int freq() {
538 return current.freq();
539 }
540
541 public void seek(Term term) {
542 this.term = term;
543 this.base = 0;
544 this.pointer = 0;
545 this.current = null;
546 }
547
548 public void seek(TermEnum termEnum) throws IOException {
549 seek(termEnum.term());
550 }
551
552 public boolean next() throws IOException {
553 for(;;) {
554 if (current!=null && current.next()) {
555 return true;
556 }
557 else if (pointer < readers.length) {
558 base = starts[pointer];
559 current = termDocs(pointer++);
560 } else {
561 return false;
562 }
563 }
564 }
565
566 /** Optimized implementation. */
567 public int read(final int[] docs, final int[] freqs) throws IOException {
568 while (true) {
569 while (current == null) {
570 if (pointer < readers.length) { // try next segment
571 base = starts[pointer];
572 current = termDocs(pointer++);
573 } else {
574 return 0;
575 }
576 }
577 int end = current.read(docs, freqs);
578 if (end == 0) { // none left in segment
579 current = null;
580 } else { // got some
581 final int b = base; // adjust doc numbers
582 for (int i = 0; i < end; i++)
583 docs[i] += b;
584 return end;
585 }
586 }
587 }
588
589 /* A Possible future optimization could skip entire segments */
590 public boolean skipTo(int target) throws IOException {
591 for(;;) {
592 if (current != null && current.skipTo(target-base)) {
593 return true;
594 } else if (pointer < readers.length) {
595 base = starts[pointer];
596 current = termDocs(pointer++);
597 } else
598 return false;
599 }
600 }
601
602 private TermDocs termDocs(int i) throws IOException {
603 if (term == null)
604 return null;
605 TermDocs result = readerTermDocs[i];
606 if (result == null)
607 result = readerTermDocs[i] = termDocs(readers[i]);
608 result.seek(term);
609 return result;
610 }
611
612 protected TermDocs termDocs(IndexReader reader)
613 throws IOException {
614 return reader.termDocs();
615 }
616
617 public void close() throws IOException {
618 for (int i = 0; i < readerTermDocs.length; i++) {
619 if (readerTermDocs[i] != null)
620 readerTermDocs[i].close();
621 }
622 }
623 }
624
625 static class MultiTermPositions extends MultiTermDocs implements TermPositions {
626 public MultiTermPositions(IndexReader[] r, int[] s) {
627 super(r,s);
628 }
629
630 protected TermDocs termDocs(IndexReader reader) throws IOException {
631 return (TermDocs)reader.termPositions();
632 }
633
634 public int nextPosition() throws IOException {
635 return ((TermPositions)current).nextPosition();
636 }
637
638 public int getPayloadLength() {
639 return ((TermPositions)current).getPayloadLength();
640 }
641
642 public byte[] getPayload(byte[] data, int offset) throws IOException {
643 return ((TermPositions)current).getPayload(data, offset);
644 }
645
646
647 // TODO: Remove warning after API has been finalized
648 public boolean isPayloadAvailable() {
649 return ((TermPositions) current).isPayloadAvailable();
650 }
651 }
652 }