1 package org.apache.lucene.index;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Set;
30
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.FieldSelector;
33 import org.apache.lucene.search.DefaultSimilarity;
34 import org.apache.lucene.store.BufferedIndexInput;
35 import org.apache.lucene.store.Directory;
36 import org.apache.lucene.store.IndexInput;
37 import org.apache.lucene.store.IndexOutput;
38 import org.apache.lucene.util.BitVector;
39 import org.apache.lucene.util.CloseableThreadLocal;
40
41 /** @version $Id */
42 /**
43 * <p><b>NOTE:</b> This API is new and still experimental
44 * (subject to change suddenly in the next release)</p>
45 */
46 public class SegmentReader extends IndexReader implements Cloneable {
47 protected boolean readOnly;
48
49 private SegmentInfo si;
50 private int readBufferSize;
51
52 CloseableThreadLocal<FieldsReader> fieldsReaderLocal = new FieldsReaderLocal();
53 CloseableThreadLocal<TermVectorsReader> termVectorsLocal = new CloseableThreadLocal<TermVectorsReader>();
54
55 BitVector deletedDocs = null;
56 Ref deletedDocsRef = null;
57 private boolean deletedDocsDirty = false;
58 private boolean normsDirty = false;
59 private int pendingDeleteCount;
60
61 private boolean rollbackHasChanges = false;
62 private boolean rollbackDeletedDocsDirty = false;
63 private boolean rollbackNormsDirty = false;
64 private int rollbackPendingDeleteCount;
65
66 // optionally used for the .nrm file shared by multiple norms
67 private IndexInput singleNormStream;
68 private Ref singleNormRef;
69
70 CoreReaders core;
71
72 // Holds core readers that are shared (unchanged) when
73 // SegmentReader is cloned or reopened
74 static final class CoreReaders {
75
76 // Counts how many other reader share the core objects
77 // (freqStream, proxStream, tis, etc.) of this reader;
78 // when coreRef drops to 0, these core objects may be
79 // closed. A given instance of SegmentReader may be
80 // closed, even those it shares core objects with other
81 // SegmentReaders:
82 private final Ref ref = new Ref();
83
84 final String segment;
85 final FieldInfos fieldInfos;
86 final IndexInput freqStream;
87 final IndexInput proxStream;
88 final TermInfosReader tisNoIndex;
89
90 final Directory dir;
91 final Directory cfsDir;
92 final int readBufferSize;
93 final int termsIndexDivisor;
94
95 TermInfosReader tis;
96 FieldsReader fieldsReaderOrig;
97 TermVectorsReader termVectorsReaderOrig;
98 CompoundFileReader cfsReader;
99 CompoundFileReader storeCFSReader;
100
101 CoreReaders(Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) throws IOException {
102 segment = si.name;
103 this.readBufferSize = readBufferSize;
104 this.dir = dir;
105
106 boolean success = false;
107
108 try {
109 Directory dir0 = dir;
110 if (si.getUseCompoundFile()) {
111 cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
112 dir0 = cfsReader;
113 }
114 cfsDir = dir0;
115
116 fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
117
118 this.termsIndexDivisor = termsIndexDivisor;
119 TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor);
120 if (termsIndexDivisor == -1) {
121 tisNoIndex = reader;
122 } else {
123 tis = reader;
124 tisNoIndex = null;
125 }
126
127 // make sure that all index files have been read or are kept open
128 // so that if an index update removes them we'll still have them
129 freqStream = cfsDir.openInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize);
130
131 if (fieldInfos.hasProx()) {
132 proxStream = cfsDir.openInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize);
133 } else {
134 proxStream = null;
135 }
136 success = true;
137 } finally {
138 if (!success) {
139 decRef();
140 }
141 }
142 }
143
144 synchronized TermVectorsReader getTermVectorsReaderOrig() {
145 return termVectorsReaderOrig;
146 }
147
148 synchronized FieldsReader getFieldsReaderOrig() {
149 return fieldsReaderOrig;
150 }
151
152 synchronized void incRef() {
153 ref.incRef();
154 }
155
156 synchronized Directory getCFSReader() {
157 return cfsReader;
158 }
159
160 synchronized TermInfosReader getTermsReader() {
161 if (tis != null) {
162 return tis;
163 } else {
164 return tisNoIndex;
165 }
166 }
167
168 synchronized boolean termsIndexIsLoaded() {
169 return tis != null;
170 }
171
172 // NOTE: only called from IndexWriter when a near
173 // real-time reader is opened, or applyDeletes is run,
174 // sharing a segment that's still being merged. This
175 // method is not fully thread safe, and relies on the
176 // synchronization in IndexWriter
177 synchronized void loadTermsIndex(SegmentInfo si, int termsIndexDivisor) throws IOException {
178 if (tis == null) {
179 Directory dir0;
180 if (si.getUseCompoundFile()) {
181 // In some cases, we were originally opened when CFS
182 // was not used, but then we are asked to open the
183 // terms reader with index, the segment has switched
184 // to CFS
185 if (cfsReader == null) {
186 cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
187 }
188 dir0 = cfsReader;
189 } else {
190 dir0 = dir;
191 }
192
193 tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor);
194 }
195 }
196
197 synchronized void decRef() throws IOException {
198
199 if (ref.decRef() == 0) {
200
201 // close everything, nothing is shared anymore with other readers
202 if (tis != null) {
203 tis.close();
204 // null so if an app hangs on to us we still free most ram
205 tis = null;
206 }
207
208 if (tisNoIndex != null) {
209 tisNoIndex.close();
210 }
211
212 if (freqStream != null) {
213 freqStream.close();
214 }
215
216 if (proxStream != null) {
217 proxStream.close();
218 }
219
220 if (termVectorsReaderOrig != null) {
221 termVectorsReaderOrig.close();
222 }
223
224 if (fieldsReaderOrig != null) {
225 fieldsReaderOrig.close();
226 }
227
228 if (cfsReader != null) {
229 cfsReader.close();
230 }
231
232 if (storeCFSReader != null) {
233 storeCFSReader.close();
234 }
235 }
236 }
237
238 synchronized void openDocStores(SegmentInfo si) throws IOException {
239
240 assert si.name.equals(segment);
241
242 if (fieldsReaderOrig == null) {
243 final Directory storeDir;
244 if (si.getDocStoreOffset() != -1) {
245 if (si.getDocStoreIsCompoundFile()) {
246 assert storeCFSReader == null;
247 storeCFSReader = new CompoundFileReader(dir,
248 si.getDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION,
249 readBufferSize);
250 storeDir = storeCFSReader;
251 assert storeDir != null;
252 } else {
253 storeDir = dir;
254 assert storeDir != null;
255 }
256 } else if (si.getUseCompoundFile()) {
257 // In some cases, we were originally opened when CFS
258 // was not used, but then we are asked to open doc
259 // stores after the segment has switched to CFS
260 if (cfsReader == null) {
261 cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
262 }
263 storeDir = cfsReader;
264 assert storeDir != null;
265 } else {
266 storeDir = dir;
267 assert storeDir != null;
268 }
269
270 final String storesSegment;
271 if (si.getDocStoreOffset() != -1) {
272 storesSegment = si.getDocStoreSegment();
273 } else {
274 storesSegment = segment;
275 }
276
277 fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize,
278 si.getDocStoreOffset(), si.docCount);
279
280 // Verify two sources of "maxDoc" agree:
281 if (si.getDocStoreOffset() == -1 && fieldsReaderOrig.size() != si.docCount) {
282 throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.size() + " but segmentInfo shows " + si.docCount);
283 }
284
285 if (fieldInfos.hasVectors()) { // open term vector files only as needed
286 termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount);
287 }
288 }
289 }
290 }
291
292 /**
293 * Sets the initial value
294 */
295 private class FieldsReaderLocal extends CloseableThreadLocal<FieldsReader> {
296 @Override
297 protected FieldsReader initialValue() {
298 return (FieldsReader) core.getFieldsReaderOrig().clone();
299 }
300 }
301
302 static class Ref {
303 private int refCount = 1;
304
305 @Override
306 public String toString() {
307 return "refcount: "+refCount;
308 }
309
310 public synchronized int refCount() {
311 return refCount;
312 }
313
314 public synchronized int incRef() {
315 assert refCount > 0;
316 refCount++;
317 return refCount;
318 }
319
320 public synchronized int decRef() {
321 assert refCount > 0;
322 refCount--;
323 return refCount;
324 }
325 }
326
327 /**
328 * Byte[] referencing is used because a new norm object needs
329 * to be created for each clone, and the byte array is all
330 * that is needed for sharing between cloned readers. The
331 * current norm referencing is for sharing between readers
332 * whereas the byte[] referencing is for copy on write which
333 * is independent of reader references (i.e. incRef, decRef).
334 */
335
336 final class Norm implements Cloneable {
337 private int refCount = 1;
338
339 // If this instance is a clone, the originalNorm
340 // references the Norm that has a real open IndexInput:
341 private Norm origNorm;
342
343 private IndexInput in;
344 private long normSeek;
345
346 // null until bytes is set
347 private Ref bytesRef;
348 private byte[] bytes;
349 private boolean dirty;
350 private int number;
351 private boolean rollbackDirty;
352
353 public Norm(IndexInput in, int number, long normSeek) {
354 this.in = in;
355 this.number = number;
356 this.normSeek = normSeek;
357 }
358
359 public synchronized void incRef() {
360 assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
361 refCount++;
362 }
363
364 private void closeInput() throws IOException {
365 if (in != null) {
366 if (in != singleNormStream) {
367 // It's private to us -- just close it
368 in.close();
369 } else {
370 // We are sharing this with others -- decRef and
371 // maybe close the shared norm stream
372 if (singleNormRef.decRef() == 0) {
373 singleNormStream.close();
374 singleNormStream = null;
375 }
376 }
377
378 in = null;
379 }
380 }
381
382 public synchronized void decRef() throws IOException {
383 assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
384
385 if (--refCount == 0) {
386 if (origNorm != null) {
387 origNorm.decRef();
388 origNorm = null;
389 } else {
390 closeInput();
391 }
392
393 if (bytes != null) {
394 assert bytesRef != null;
395 bytesRef.decRef();
396 bytes = null;
397 bytesRef = null;
398 } else {
399 assert bytesRef == null;
400 }
401 }
402 }
403
404 // Load bytes but do not cache them if they were not
405 // already cached
406 public synchronized void bytes(byte[] bytesOut, int offset, int len) throws IOException {
407 assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
408 if (bytes != null) {
409 // Already cached -- copy from cache:
410 assert len <= maxDoc();
411 System.arraycopy(bytes, 0, bytesOut, offset, len);
412 } else {
413 // Not cached
414 if (origNorm != null) {
415 // Ask origNorm to load
416 origNorm.bytes(bytesOut, offset, len);
417 } else {
418 // We are orig -- read ourselves from disk:
419 synchronized(in) {
420 in.seek(normSeek);
421 in.readBytes(bytesOut, offset, len, false);
422 }
423 }
424 }
425 }
426
427 // Load & cache full bytes array. Returns bytes.
428 public synchronized byte[] bytes() throws IOException {
429 assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
430 if (bytes == null) { // value not yet read
431 assert bytesRef == null;
432 if (origNorm != null) {
433 // Ask origNorm to load so that for a series of
434 // reopened readers we share a single read-only
435 // byte[]
436 bytes = origNorm.bytes();
437 bytesRef = origNorm.bytesRef;
438 bytesRef.incRef();
439
440 // Once we've loaded the bytes we no longer need
441 // origNorm:
442 origNorm.decRef();
443 origNorm = null;
444
445 } else {
446 // We are the origNorm, so load the bytes for real
447 // ourself:
448 final int count = maxDoc();
449 bytes = new byte[count];
450
451 // Since we are orig, in must not be null
452 assert in != null;
453
454 // Read from disk.
455 synchronized(in) {
456 in.seek(normSeek);
457 in.readBytes(bytes, 0, count, false);
458 }
459
460 bytesRef = new Ref();
461 closeInput();
462 }
463 }
464
465 return bytes;
466 }
467
468 // Only for testing
469 Ref bytesRef() {
470 return bytesRef;
471 }
472
473 // Called if we intend to change a norm value. We make a
474 // private copy of bytes if it's shared with others:
475 public synchronized byte[] copyOnWrite() throws IOException {
476 assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
477 bytes();
478 assert bytes != null;
479 assert bytesRef != null;
480 if (bytesRef.refCount() > 1) {
481 // I cannot be the origNorm for another norm
482 // instance if I'm being changed. Ie, only the
483 // "head Norm" can be changed:
484 assert refCount == 1;
485 final Ref oldRef = bytesRef;
486 bytes = cloneNormBytes(bytes);
487 bytesRef = new Ref();
488 oldRef.decRef();
489 }
490 dirty = true;
491 return bytes;
492 }
493
494 // Returns a copy of this Norm instance that shares
495 // IndexInput & bytes with the original one
496 @Override
497 public synchronized Object clone() {
498 assert refCount > 0 && (origNorm == null || origNorm.refCount > 0);
499
500 Norm clone;
501 try {
502 clone = (Norm) super.clone();
503 } catch (CloneNotSupportedException cnse) {
504 // Cannot happen
505 throw new RuntimeException("unexpected CloneNotSupportedException", cnse);
506 }
507 clone.refCount = 1;
508
509 if (bytes != null) {
510 assert bytesRef != null;
511 assert origNorm == null;
512
513 // Clone holds a reference to my bytes:
514 clone.bytesRef.incRef();
515 } else {
516 assert bytesRef == null;
517 if (origNorm == null) {
518 // I become the origNorm for the clone:
519 clone.origNorm = this;
520 }
521 clone.origNorm.incRef();
522 }
523
524 // Only the origNorm will actually readBytes from in:
525 clone.in = null;
526
527 return clone;
528 }
529
530 // Flush all pending changes to the next generation
531 // separate norms file.
532 public void reWrite(SegmentInfo si) throws IOException {
533 assert refCount > 0 && (origNorm == null || origNorm.refCount > 0): "refCount=" + refCount + " origNorm=" + origNorm;
534
535 // NOTE: norms are re-written in regular directory, not cfs
536 si.advanceNormGen(this.number);
537 IndexOutput out = directory().createOutput(si.getNormFileName(this.number));
538 try {
539 out.writeBytes(bytes, maxDoc());
540 } finally {
541 out.close();
542 }
543 this.dirty = false;
544 }
545 }
546
547 Map<String,Norm> norms = new HashMap<String,Norm>();
548
549 /**
550 * @throws CorruptIndexException if the index is corrupt
551 * @throws IOException if there is a low-level IO error
552 */
553 public static SegmentReader get(boolean readOnly, SegmentInfo si, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
554 return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor);
555 }
556
557 /**
558 * @throws CorruptIndexException if the index is corrupt
559 * @throws IOException if there is a low-level IO error
560 */
561 public static SegmentReader get(boolean readOnly,
562 Directory dir,
563 SegmentInfo si,
564 int readBufferSize,
565 boolean doOpenStores,
566 int termInfosIndexDivisor)
567 throws CorruptIndexException, IOException {
568 SegmentReader instance = readOnly ? new ReadOnlySegmentReader() : new SegmentReader();
569 instance.readOnly = readOnly;
570 instance.si = si;
571 instance.readBufferSize = readBufferSize;
572
573 boolean success = false;
574
575 try {
576 instance.core = new CoreReaders(dir, si, readBufferSize, termInfosIndexDivisor);
577 if (doOpenStores) {
578 instance.core.openDocStores(si);
579 }
580 instance.loadDeletedDocs();
581 instance.openNorms(instance.core.cfsDir, readBufferSize);
582 success = true;
583 } finally {
584
585 // With lock-less commits, it's entirely possible (and
586 // fine) to hit a FileNotFound exception above. In
587 // this case, we want to explicitly close any subset
588 // of things that were opened so that we don't have to
589 // wait for a GC to do so.
590 if (!success) {
591 instance.doClose();
592 }
593 }
594 return instance;
595 }
596
597 void openDocStores() throws IOException {
598 core.openDocStores(si);
599 }
600
601 private void loadDeletedDocs() throws IOException {
602 // NOTE: the bitvector is stored using the regular directory, not cfs
603 if (hasDeletions(si)) {
604 deletedDocs = new BitVector(directory(), si.getDelFileName());
605 deletedDocsRef = new Ref();
606
607 assert si.getDelCount() == deletedDocs.count() :
608 "delete count mismatch: info=" + si.getDelCount() + " vs BitVector=" + deletedDocs.count();
609
610 // Verify # deletes does not exceed maxDoc for this
611 // segment:
612 assert si.getDelCount() <= maxDoc() :
613 "delete count mismatch: " + deletedDocs.count() + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name;
614
615 } else
616 assert si.getDelCount() == 0;
617 }
618
619 /**
620 * Clones the norm bytes. May be overridden by subclasses. New and experimental.
621 * @param bytes Byte array to clone
622 * @return New BitVector
623 */
624 protected byte[] cloneNormBytes(byte[] bytes) {
625 byte[] cloneBytes = new byte[bytes.length];
626 System.arraycopy(bytes, 0, cloneBytes, 0, bytes.length);
627 return cloneBytes;
628 }
629
630 /**
631 * Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental.
632 * @param bv BitVector to clone
633 * @return New BitVector
634 */
635 protected BitVector cloneDeletedDocs(BitVector bv) {
636 return (BitVector)bv.clone();
637 }
638
639 @Override
640 public final synchronized Object clone() {
641 try {
642 return clone(readOnly); // Preserve current readOnly
643 } catch (Exception ex) {
644 throw new RuntimeException(ex);
645 }
646 }
647
648 @Override
649 public final synchronized IndexReader clone(boolean openReadOnly) throws CorruptIndexException, IOException {
650 return reopenSegment(si, true, openReadOnly);
651 }
652
653 synchronized SegmentReader reopenSegment(SegmentInfo si, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException {
654 boolean deletionsUpToDate = (this.si.hasDeletions() == si.hasDeletions())
655 && (!si.hasDeletions() || this.si.getDelFileName().equals(si.getDelFileName()));
656 boolean normsUpToDate = true;
657
658 boolean[] fieldNormsChanged = new boolean[core.fieldInfos.size()];
659 final int fieldCount = core.fieldInfos.size();
660 for (int i = 0; i < fieldCount; i++) {
661 if (!this.si.getNormFileName(i).equals(si.getNormFileName(i))) {
662 normsUpToDate = false;
663 fieldNormsChanged[i] = true;
664 }
665 }
666
667 // if we're cloning we need to run through the reopenSegment logic
668 // also if both old and new readers aren't readonly, we clone to avoid sharing modifications
669 if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) {
670 return this;
671 }
672
673 // When cloning, the incoming SegmentInfos should not
674 // have any changes in it:
675 assert !doClone || (normsUpToDate && deletionsUpToDate);
676
677 // clone reader
678 SegmentReader clone = openReadOnly ? new ReadOnlySegmentReader() : new SegmentReader();
679
680 boolean success = false;
681 try {
682 core.incRef();
683 clone.core = core;
684 clone.readOnly = openReadOnly;
685 clone.si = si;
686 clone.readBufferSize = readBufferSize;
687
688 if (!openReadOnly && hasChanges) {
689 // My pending changes transfer to the new reader
690 clone.pendingDeleteCount = pendingDeleteCount;
691 clone.deletedDocsDirty = deletedDocsDirty;
692 clone.normsDirty = normsDirty;
693 clone.hasChanges = hasChanges;
694 hasChanges = false;
695 }
696
697 if (doClone) {
698 if (deletedDocs != null) {
699 deletedDocsRef.incRef();
700 clone.deletedDocs = deletedDocs;
701 clone.deletedDocsRef = deletedDocsRef;
702 }
703 } else {
704 if (!deletionsUpToDate) {
705 // load deleted docs
706 assert clone.deletedDocs == null;
707 clone.loadDeletedDocs();
708 } else if (deletedDocs != null) {
709 deletedDocsRef.incRef();
710 clone.deletedDocs = deletedDocs;
711 clone.deletedDocsRef = deletedDocsRef;
712 }
713 }
714
715 clone.norms = new HashMap<String,Norm>();
716
717 // Clone norms
718 for (int i = 0; i < fieldNormsChanged.length; i++) {
719
720 // Clone unchanged norms to the cloned reader
721 if (doClone || !fieldNormsChanged[i]) {
722 final String curField = core.fieldInfos.fieldInfo(i).name;
723 Norm norm = this.norms.get(curField);
724 if (norm != null)
725 clone.norms.put(curField, (Norm) norm.clone());
726 }
727 }
728
729 // If we are not cloning, then this will open anew
730 // any norms that have changed:
731 clone.openNorms(si.getUseCompoundFile() ? core.getCFSReader() : directory(), readBufferSize);
732
733 success = true;
734 } finally {
735 if (!success) {
736 // An exception occurred during reopen, we have to decRef the norms
737 // that we incRef'ed already and close singleNormsStream and FieldsReader
738 clone.decRef();
739 }
740 }
741
742 return clone;
743 }
744
745 @Override
746 protected void doCommit(Map<String,String> commitUserData) throws IOException {
747 if (hasChanges) {
748 if (deletedDocsDirty) { // re-write deleted
749 si.advanceDelGen();
750
751 // We can write directly to the actual name (vs to a
752 // .tmp & renaming it) because the file is not live
753 // until segments file is written:
754 deletedDocs.write(directory(), si.getDelFileName());
755
756 si.setDelCount(si.getDelCount()+pendingDeleteCount);
757 pendingDeleteCount = 0;
758 assert deletedDocs.count() == si.getDelCount(): "delete count mismatch during commit: info=" + si.getDelCount() + " vs BitVector=" + deletedDocs.count();
759 } else {
760 assert pendingDeleteCount == 0;
761 }
762
763 if (normsDirty) { // re-write norms
764 si.setNumFields(core.fieldInfos.size());
765 for (final Norm norm : norms.values()) {
766 if (norm.dirty) {
767 norm.reWrite(si);
768 }
769 }
770 }
771 deletedDocsDirty = false;
772 normsDirty = false;
773 hasChanges = false;
774 }
775 }
776
777 FieldsReader getFieldsReader() {
778 return fieldsReaderLocal.get();
779 }
780
781 @Override
782 protected void doClose() throws IOException {
783 termVectorsLocal.close();
784 fieldsReaderLocal.close();
785
786 if (deletedDocs != null) {
787 deletedDocsRef.decRef();
788 // null so if an app hangs on to us we still free most ram
789 deletedDocs = null;
790 }
791
792 for (final Norm norm : norms.values()) {
793 norm.decRef();
794 }
795 if (core != null) {
796 core.decRef();
797 }
798 }
799
800 static boolean hasDeletions(SegmentInfo si) throws IOException {
801 // Don't call ensureOpen() here (it could affect performance)
802 return si.hasDeletions();
803 }
804
805 @Override
806 public boolean hasDeletions() {
807 // Don't call ensureOpen() here (it could affect performance)
808 return deletedDocs != null;
809 }
810
811 static boolean usesCompoundFile(SegmentInfo si) throws IOException {
812 return si.getUseCompoundFile();
813 }
814
815 static boolean hasSeparateNorms(SegmentInfo si) throws IOException {
816 return si.hasSeparateNorms();
817 }
818
819 @Override
820 protected void doDelete(int docNum) {
821 if (deletedDocs == null) {
822 deletedDocs = new BitVector(maxDoc());
823 deletedDocsRef = new Ref();
824 }
825 // there is more than 1 SegmentReader with a reference to this
826 // deletedDocs BitVector so decRef the current deletedDocsRef,
827 // clone the BitVector, create a new deletedDocsRef
828 if (deletedDocsRef.refCount() > 1) {
829 Ref oldRef = deletedDocsRef;
830 deletedDocs = cloneDeletedDocs(deletedDocs);
831 deletedDocsRef = new Ref();
832 oldRef.decRef();
833 }
834 deletedDocsDirty = true;
835 if (!deletedDocs.getAndSet(docNum))
836 pendingDeleteCount++;
837 }
838
839 @Override
840 protected void doUndeleteAll() {
841 deletedDocsDirty = false;
842 if (deletedDocs != null) {
843 assert deletedDocsRef != null;
844 deletedDocsRef.decRef();
845 deletedDocs = null;
846 deletedDocsRef = null;
847 pendingDeleteCount = 0;
848 si.clearDelGen();
849 si.setDelCount(0);
850 } else {
851 assert deletedDocsRef == null;
852 assert pendingDeleteCount == 0;
853 }
854 }
855
856 List<String> files() throws IOException {
857 return new ArrayList<String>(si.files());
858 }
859
860 @Override
861 public TermEnum terms() {
862 ensureOpen();
863 return core.getTermsReader().terms();
864 }
865
866 @Override
867 public TermEnum terms(Term t) throws IOException {
868 ensureOpen();
869 return core.getTermsReader().terms(t);
870 }
871
872 FieldInfos fieldInfos() {
873 return core.fieldInfos;
874 }
875
876 @Override
877 public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
878 ensureOpen();
879 return getFieldsReader().doc(n, fieldSelector);
880 }
881
882 @Override
883 public synchronized boolean isDeleted(int n) {
884 return (deletedDocs != null && deletedDocs.get(n));
885 }
886
887 @Override
888 public TermDocs termDocs(Term term) throws IOException {
889 if (term == null) {
890 return new AllTermDocs(this);
891 } else {
892 return super.termDocs(term);
893 }
894 }
895
896 @Override
897 public TermDocs termDocs() throws IOException {
898 ensureOpen();
899 return new SegmentTermDocs(this);
900 }
901
902 @Override
903 public TermPositions termPositions() throws IOException {
904 ensureOpen();
905 return new SegmentTermPositions(this);
906 }
907
908 @Override
909 public int docFreq(Term t) throws IOException {
910 ensureOpen();
911 TermInfo ti = core.getTermsReader().get(t);
912 if (ti != null)
913 return ti.docFreq;
914 else
915 return 0;
916 }
917
918 @Override
919 public int numDocs() {
920 // Don't call ensureOpen() here (it could affect performance)
921 int n = maxDoc();
922 if (deletedDocs != null)
923 n -= deletedDocs.count();
924 return n;
925 }
926
927 @Override
928 public int maxDoc() {
929 // Don't call ensureOpen() here (it could affect performance)
930 return si.docCount;
931 }
932
933 /**
934 * @see IndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)
935 */
936 @Override
937 public Collection<String> getFieldNames(IndexReader.FieldOption fieldOption) {
938 ensureOpen();
939
940 Set<String> fieldSet = new HashSet<String>();
941 for (int i = 0; i < core.fieldInfos.size(); i++) {
942 FieldInfo fi = core.fieldInfos.fieldInfo(i);
943 if (fieldOption == IndexReader.FieldOption.ALL) {
944 fieldSet.add(fi.name);
945 }
946 else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
947 fieldSet.add(fi.name);
948 }
949 else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) {
950 fieldSet.add(fi.name);
951 }
952 else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
953 fieldSet.add(fi.name);
954 }
955 else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) {
956 fieldSet.add(fi.name);
957 }
958 else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
959 fieldSet.add(fi.name);
960 }
961 else if (fi.storeTermVector == true &&
962 fi.storePositionWithTermVector == false &&
963 fi.storeOffsetWithTermVector == false &&
964 fieldOption == IndexReader.FieldOption.TERMVECTOR) {
965 fieldSet.add(fi.name);
966 }
967 else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
968 fieldSet.add(fi.name);
969 }
970 else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
971 fieldSet.add(fi.name);
972 }
973 else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
974 fieldSet.add(fi.name);
975 }
976 else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) &&
977 fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
978 fieldSet.add(fi.name);
979 }
980 }
981 return fieldSet;
982 }
983
984
985 @Override
986 public synchronized boolean hasNorms(String field) {
987 ensureOpen();
988 return norms.containsKey(field);
989 }
990
991 // can return null if norms aren't stored
992 protected synchronized byte[] getNorms(String field) throws IOException {
993 Norm norm = norms.get(field);
994 if (norm == null) return null; // not indexed, or norms not stored
995 return norm.bytes();
996 }
997
998 // returns fake norms if norms aren't available
999 @Override
1000 public synchronized byte[] norms(String field) throws IOException {
1001 ensureOpen();
1002 byte[] bytes = getNorms(field);
1003 return bytes;
1004 }
1005
1006 @Override
1007 protected void doSetNorm(int doc, String field, byte value)
1008 throws IOException {
1009 Norm norm = norms.get(field);
1010 if (norm == null) // not an indexed field
1011 return;
1012
1013 normsDirty = true;
1014 norm.copyOnWrite()[doc] = value; // set the value
1015 }
1016
1017 /** Read norms into a pre-allocated array. */
1018 @Override
1019 public synchronized void norms(String field, byte[] bytes, int offset)
1020 throws IOException {
1021
1022 ensureOpen();
1023 Norm norm = norms.get(field);
1024 if (norm == null) {
1025 Arrays.fill(bytes, offset, bytes.length, DefaultSimilarity.encodeNorm(1.0f));
1026 return;
1027 }
1028
1029 norm.bytes(bytes, offset, maxDoc());
1030 }
1031
1032
1033 private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
1034 long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
1035 int maxDoc = maxDoc();
1036 for (int i = 0; i < core.fieldInfos.size(); i++) {
1037 FieldInfo fi = core.fieldInfos.fieldInfo(i);
1038 if (norms.containsKey(fi.name)) {
1039 // in case this SegmentReader is being re-opened, we might be able to
1040 // reuse some norm instances and skip loading them here
1041 continue;
1042 }
1043 if (fi.isIndexed && !fi.omitNorms) {
1044 Directory d = directory();
1045 String fileName = si.getNormFileName(fi.number);
1046 if (!si.hasSeparateNorms(fi.number)) {
1047 d = cfsDir;
1048 }
1049
1050 // singleNormFile means multiple norms share this file
1051 boolean singleNormFile = fileName.endsWith("." + IndexFileNames.NORMS_EXTENSION);
1052 IndexInput normInput = null;
1053 long normSeek;
1054
1055 if (singleNormFile) {
1056 normSeek = nextNormSeek;
1057 if (singleNormStream == null) {
1058 singleNormStream = d.openInput(fileName, readBufferSize);
1059 singleNormRef = new Ref();
1060 } else {
1061 singleNormRef.incRef();
1062 }
1063 // All norms in the .nrm file can share a single IndexInput since
1064 // they are only used in a synchronized context.
1065 // If this were to change in the future, a clone could be done here.
1066 normInput = singleNormStream;
1067 } else {
1068 normSeek = 0;
1069 normInput = d.openInput(fileName);
1070 }
1071
1072 norms.put(fi.name, new Norm(normInput, fi.number, normSeek));
1073 nextNormSeek += maxDoc; // increment also if some norms are separate
1074 }
1075 }
1076 }
1077
1078 boolean termsIndexLoaded() {
1079 return core.termsIndexIsLoaded();
1080 }
1081
1082 // NOTE: only called from IndexWriter when a near
1083 // real-time reader is opened, or applyDeletes is run,
1084 // sharing a segment that's still being merged. This
1085 // method is not thread safe, and relies on the
1086 // synchronization in IndexWriter
1087 void loadTermsIndex(int termsIndexDivisor) throws IOException {
1088 core.loadTermsIndex(si, termsIndexDivisor);
1089 }
1090
1091 // for testing only
1092 boolean normsClosed() {
1093 if (singleNormStream != null) {
1094 return false;
1095 }
1096 for (final Norm norm : norms.values()) {
1097 if (norm.refCount > 0) {
1098 return false;
1099 }
1100 }
1101 return true;
1102 }
1103
1104 // for testing only
1105 boolean normsClosed(String field) {
1106 return norms.get(field).refCount == 0;
1107 }
1108
1109 /**
1110 * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
1111 * @return TermVectorsReader
1112 */
1113 TermVectorsReader getTermVectorsReader() {
1114 TermVectorsReader tvReader = termVectorsLocal.get();
1115 if (tvReader == null) {
1116 TermVectorsReader orig = core.getTermVectorsReaderOrig();
1117 if (orig == null) {
1118 return null;
1119 } else {
1120 try {
1121 tvReader = (TermVectorsReader) orig.clone();
1122 } catch (CloneNotSupportedException cnse) {
1123 return null;
1124 }
1125 }
1126 termVectorsLocal.set(tvReader);
1127 }
1128 return tvReader;
1129 }
1130
1131 TermVectorsReader getTermVectorsReaderOrig() {
1132 return core.getTermVectorsReaderOrig();
1133 }
1134
1135 /** Return a term frequency vector for the specified document and field. The
1136 * vector returned contains term numbers and frequencies for all terms in
1137 * the specified field of this document, if the field had storeTermVector
1138 * flag set. If the flag was not set, the method returns null.
1139 * @throws IOException
1140 */
1141 @Override
1142 public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
1143 // Check if this field is invalid or has no stored term vector
1144 ensureOpen();
1145 FieldInfo fi = core.fieldInfos.fieldInfo(field);
1146 if (fi == null || !fi.storeTermVector)
1147 return null;
1148
1149 TermVectorsReader termVectorsReader = getTermVectorsReader();
1150 if (termVectorsReader == null)
1151 return null;
1152
1153 return termVectorsReader.get(docNumber, field);
1154 }
1155
1156
1157 @Override
1158 public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
1159 ensureOpen();
1160 FieldInfo fi = core.fieldInfos.fieldInfo(field);
1161 if (fi == null || !fi.storeTermVector)
1162 return;
1163
1164 TermVectorsReader termVectorsReader = getTermVectorsReader();
1165 if (termVectorsReader == null) {
1166 return;
1167 }
1168
1169
1170 termVectorsReader.get(docNumber, field, mapper);
1171 }
1172
1173
1174 @Override
1175 public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
1176 ensureOpen();
1177
1178 TermVectorsReader termVectorsReader = getTermVectorsReader();
1179 if (termVectorsReader == null)
1180 return;
1181
1182 termVectorsReader.get(docNumber, mapper);
1183 }
1184
1185 /** Return an array of term frequency vectors for the specified document.
1186 * The array contains a vector for each vectorized field in the document.
1187 * Each vector vector contains term numbers and frequencies for all terms
1188 * in a given vectorized field.
1189 * If no such fields existed, the method returns null.
1190 * @throws IOException
1191 */
1192 @Override
1193 public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
1194 ensureOpen();
1195
1196 TermVectorsReader termVectorsReader = getTermVectorsReader();
1197 if (termVectorsReader == null)
1198 return null;
1199
1200 return termVectorsReader.get(docNumber);
1201 }
1202
1203 /**
1204 * Return the name of the segment this reader is reading.
1205 */
1206 public String getSegmentName() {
1207 return core.segment;
1208 }
1209
1210 /**
1211 * Return the SegmentInfo of the segment this reader is reading.
1212 */
1213 SegmentInfo getSegmentInfo() {
1214 return si;
1215 }
1216
1217 void setSegmentInfo(SegmentInfo info) {
1218 si = info;
1219 }
1220
1221 void startCommit() {
1222 rollbackHasChanges = hasChanges;
1223 rollbackDeletedDocsDirty = deletedDocsDirty;
1224 rollbackNormsDirty = normsDirty;
1225 rollbackPendingDeleteCount = pendingDeleteCount;
1226 for (Norm norm : norms.values()) {
1227 norm.rollbackDirty = norm.dirty;
1228 }
1229 }
1230
1231 void rollbackCommit() {
1232 hasChanges = rollbackHasChanges;
1233 deletedDocsDirty = rollbackDeletedDocsDirty;
1234 normsDirty = rollbackNormsDirty;
1235 pendingDeleteCount = rollbackPendingDeleteCount;
1236 for (Norm norm : norms.values()) {
1237 norm.dirty = norm.rollbackDirty;
1238 }
1239 }
1240
1241 /** Returns the directory this index resides in. */
1242 @Override
1243 public Directory directory() {
1244 // Don't ensureOpen here -- in certain cases, when a
1245 // cloned/reopened reader needs to commit, it may call
1246 // this method on the closed original reader
1247 return core.dir;
1248 }
1249
1250 // This is necessary so that cloned SegmentReaders (which
1251 // share the underlying postings data) will map to the
1252 // same entry in the FieldCache. See LUCENE-1579.
1253 @Override
1254 public final Object getFieldCacheKey() {
1255 return core.freqStream;
1256 }
1257
1258 @Override
1259 public long getUniqueTermCount() {
1260 return core.getTermsReader().size();
1261 }
1262
1263 /**
1264 * Lotsa tests did hacks like:<br/>
1265 * SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/>
1266 * They broke. This method serves as a hack to keep hacks working
1267 * We do it with R/W access for the tests (BW compatibility)
1268 * @deprecated Remove this when tests are fixed!
1269 */
1270 static SegmentReader getOnlySegmentReader(Directory dir) throws IOException {
1271 return getOnlySegmentReader(IndexReader.open(dir,false));
1272 }
1273
1274 static SegmentReader getOnlySegmentReader(IndexReader reader) {
1275 if (reader instanceof SegmentReader)
1276 return (SegmentReader) reader;
1277
1278 if (reader instanceof DirectoryReader) {
1279 IndexReader[] subReaders = reader.getSequentialSubReaders();
1280 if (subReaders.length != 1)
1281 throw new IllegalArgumentException(reader + " has " + subReaders.length + " segments instead of exactly one");
1282
1283 return (SegmentReader) subReaders[0];
1284 }
1285
1286 throw new IllegalArgumentException(reader + " is not a SegmentReader or a single-segment DirectoryReader");
1287 }
1288
1289 @Override
1290 public int getTermInfosIndexDivisor() {
1291 return core.termsIndexDivisor;
1292 }
1293 }