1 package org.apache.lucene.index;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.Iterator;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.Set;
30
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.FieldSelector;
33 import org.apache.lucene.search.DefaultSimilarity;
34 import org.apache.lucene.store.BufferedIndexInput;
35 import org.apache.lucene.store.Directory;
36 import org.apache.lucene.store.IndexInput;
37 import org.apache.lucene.store.IndexOutput;
38 import org.apache.lucene.util.BitVector;
39 import org.apache.lucene.util.CloseableThreadLocal;
40
41 /**
42 * @version $Id: SegmentReader.java 745797 2009-02-19 09:54:43Z mikemccand $
43 */
44 class SegmentReader extends DirectoryIndexReader {
45 private String segment;
46 private SegmentInfo si;
47 private int readBufferSize;
48
49 FieldInfos fieldInfos;
50 private FieldsReader fieldsReader;
51
52 TermInfosReader tis;
53 TermVectorsReader termVectorsReaderOrig = null;
54 CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal();
55
56 BitVector deletedDocs = null;
57 private boolean deletedDocsDirty = false;
58 private boolean normsDirty = false;
59 private boolean undeleteAll = false;
60 private int pendingDeleteCount;
61
62 private boolean rollbackDeletedDocsDirty = false;
63 private boolean rollbackNormsDirty = false;
64 private boolean rollbackUndeleteAll = false;
65 private int rollbackPendingDeleteCount;
66 private boolean readOnly;
67
68 IndexInput freqStream;
69 IndexInput proxStream;
70
71 // optionally used for the .nrm file shared by multiple norms
72 private IndexInput singleNormStream;
73
74 // Compound File Reader when based on a compound file segment
75 CompoundFileReader cfsReader = null;
76 CompoundFileReader storeCFSReader = null;
77
78 // indicates the SegmentReader with which the resources are being shared,
79 // in case this is a re-opened reader
80 private SegmentReader referencedSegmentReader = null;
81
82 private class Norm {
83 volatile int refCount;
84 boolean useSingleNormStream;
85
86 public synchronized void incRef() {
87 assert refCount > 0;
88 refCount++;
89 }
90
91 public synchronized void decRef() throws IOException {
92 assert refCount > 0;
93 if (refCount == 1) {
94 close();
95 }
96 refCount--;
97
98 }
99
100 public Norm(IndexInput in, boolean useSingleNormStream, int number, long normSeek)
101 {
102 refCount = 1;
103 this.in = in;
104 this.number = number;
105 this.normSeek = normSeek;
106 this.useSingleNormStream = useSingleNormStream;
107 }
108
109 private IndexInput in;
110 private byte[] bytes;
111 private boolean dirty;
112 private int number;
113 private long normSeek;
114 private boolean rollbackDirty;
115
116 private void reWrite(SegmentInfo si) throws IOException {
117 // NOTE: norms are re-written in regular directory, not cfs
118 si.advanceNormGen(this.number);
119 IndexOutput out = directory().createOutput(si.getNormFileName(this.number));
120 try {
121 out.writeBytes(bytes, maxDoc());
122 } finally {
123 out.close();
124 }
125 this.dirty = false;
126 }
127
128 /** Closes the underlying IndexInput for this norm.
129 * It is still valid to access all other norm properties after close is called.
130 * @throws IOException
131 */
132 private synchronized void close() throws IOException {
133 if (in != null && !useSingleNormStream) {
134 in.close();
135 }
136 in = null;
137 }
138 }
139
140 /**
141 * Increments the RC of this reader, as well as
142 * of all norms this reader is using
143 */
144 public synchronized void incRef() {
145 super.incRef();
146 Iterator it = norms.values().iterator();
147 while (it.hasNext()) {
148 Norm norm = (Norm) it.next();
149 norm.incRef();
150 }
151 }
152
153 /**
154 * only increments the RC of this reader, not tof
155 * he norms. This is important whenever a reopen()
156 * creates a new SegmentReader that doesn't share
157 * the norms with this one
158 */
159 private synchronized void incRefReaderNotNorms() {
160 super.incRef();
161 }
162
163 public synchronized void decRef() throws IOException {
164 super.decRef();
165 Iterator it = norms.values().iterator();
166 while (it.hasNext()) {
167 Norm norm = (Norm) it.next();
168 norm.decRef();
169 }
170 }
171
172 private synchronized void decRefReaderNotNorms() throws IOException {
173 super.decRef();
174 }
175
176 Map norms = new HashMap();
177
178 /** The class which implements SegmentReader. */
179 private static Class IMPL;
180 static {
181 try {
182 String name =
183 System.getProperty("org.apache.lucene.SegmentReader.class",
184 SegmentReader.class.getName());
185 IMPL = Class.forName(name);
186 } catch (ClassNotFoundException e) {
187 throw new RuntimeException("cannot load SegmentReader class: " + e, e);
188 } catch (SecurityException se) {
189 try {
190 IMPL = Class.forName(SegmentReader.class.getName());
191 } catch (ClassNotFoundException e) {
192 throw new RuntimeException("cannot load default SegmentReader class: " + e, e);
193 }
194 }
195 }
196
197 private static Class READONLY_IMPL;
198 static {
199 try {
200 String name =
201 System.getProperty("org.apache.lucene.ReadOnlySegmentReader.class",
202 ReadOnlySegmentReader.class.getName());
203 READONLY_IMPL = Class.forName(name);
204 } catch (ClassNotFoundException e) {
205 throw new RuntimeException("cannot load ReadOnlySegmentReader class: " + e, e);
206 } catch (SecurityException se) {
207 try {
208 READONLY_IMPL = Class.forName(ReadOnlySegmentReader.class.getName());
209 } catch (ClassNotFoundException e) {
210 throw new RuntimeException("cannot load default ReadOnlySegmentReader class: " + e, e);
211 }
212 }
213 }
214
215 /**
216 * @throws CorruptIndexException if the index is corrupt
217 * @throws IOException if there is a low-level IO error
218 */
219 public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
220 return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true);
221 }
222
223 /**
224 * @throws CorruptIndexException if the index is corrupt
225 * @throws IOException if there is a low-level IO error
226 */
227 public static SegmentReader get(boolean readOnly, SegmentInfo si) throws CorruptIndexException, IOException {
228 return get(readOnly, si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true);
229 }
230
231 /**
232 * @throws CorruptIndexException if the index is corrupt
233 * @throws IOException if there is a low-level IO error
234 */
235 static SegmentReader get(SegmentInfo si, boolean doOpenStores) throws CorruptIndexException, IOException {
236 return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, doOpenStores);
237 }
238
239 /**
240 * @throws CorruptIndexException if the index is corrupt
241 * @throws IOException if there is a low-level IO error
242 */
243 public static SegmentReader get(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException {
244 return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, readBufferSize, true);
245 }
246
247 /**
248 * @throws CorruptIndexException if the index is corrupt
249 * @throws IOException if there is a low-level IO error
250 */
251 static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
252 return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, readBufferSize, doOpenStores);
253 }
254
255 /**
256 * @throws CorruptIndexException if the index is corrupt
257 * @throws IOException if there is a low-level IO error
258 */
259 static SegmentReader get(boolean readOnly, SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
260 return get(readOnly, si.dir, si, null, false, false, readBufferSize, doOpenStores);
261 }
262
263 /**
264 * @throws CorruptIndexException if the index is corrupt
265 * @throws IOException if there is a low-level IO error
266 */
267 public static SegmentReader get(boolean readOnly, SegmentInfos sis, SegmentInfo si,
268 boolean closeDir) throws CorruptIndexException, IOException {
269 return get(readOnly, si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE, true);
270 }
271
272 /**
273 * @throws CorruptIndexException if the index is corrupt
274 * @throws IOException if there is a low-level IO error
275 */
276 public static SegmentReader get(Directory dir, SegmentInfo si,
277 SegmentInfos sis,
278 boolean closeDir, boolean ownDir,
279 int readBufferSize)
280 throws CorruptIndexException, IOException {
281 return get(READ_ONLY_DEFAULT, dir, si, sis, closeDir, ownDir, readBufferSize, true);
282 }
283
284 /**
285 * @throws CorruptIndexException if the index is corrupt
286 * @throws IOException if there is a low-level IO error
287 */
288 public static SegmentReader get(boolean readOnly,
289 Directory dir,
290 SegmentInfo si,
291 SegmentInfos sis,
292 boolean closeDir, boolean ownDir,
293 int readBufferSize,
294 boolean doOpenStores)
295 throws CorruptIndexException, IOException {
296 SegmentReader instance;
297 try {
298 if (readOnly)
299 instance = (SegmentReader)READONLY_IMPL.newInstance();
300 else
301 instance = (SegmentReader)IMPL.newInstance();
302 } catch (Exception e) {
303 throw new RuntimeException("cannot load SegmentReader class: " + e, e);
304 }
305 instance.init(dir, sis, closeDir, readOnly);
306 instance.initialize(si, readBufferSize, doOpenStores);
307 return instance;
308 }
309
310 private void initialize(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
311 segment = si.name;
312 this.si = si;
313 this.readBufferSize = readBufferSize;
314
315 boolean success = false;
316
317 try {
318 // Use compound file directory for some files, if it exists
319 Directory cfsDir = directory();
320 if (si.getUseCompoundFile()) {
321 cfsReader = new CompoundFileReader(directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
322 cfsDir = cfsReader;
323 }
324
325 final Directory storeDir;
326
327 if (doOpenStores) {
328 if (si.getDocStoreOffset() != -1) {
329 if (si.getDocStoreIsCompoundFile()) {
330 storeCFSReader = new CompoundFileReader(directory(), si.getDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
331 storeDir = storeCFSReader;
332 } else {
333 storeDir = directory();
334 }
335 } else {
336 storeDir = cfsDir;
337 }
338 } else
339 storeDir = null;
340
341 fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
342
343 boolean anyProx = false;
344 final int numFields = fieldInfos.size();
345 for(int i=0;!anyProx && i<numFields;i++)
346 if (!fieldInfos.fieldInfo(i).omitTf)
347 anyProx = true;
348
349 final String fieldsSegment;
350
351 if (si.getDocStoreOffset() != -1)
352 fieldsSegment = si.getDocStoreSegment();
353 else
354 fieldsSegment = segment;
355
356 if (doOpenStores) {
357 fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize,
358 si.getDocStoreOffset(), si.docCount);
359
360 // Verify two sources of "maxDoc" agree:
361 if (si.getDocStoreOffset() == -1 && fieldsReader.size() != si.docCount) {
362 throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);
363 }
364 }
365
366 tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);
367
368 loadDeletedDocs();
369
370 // make sure that all index files have been read or are kept open
371 // so that if an index update removes them we'll still have them
372 freqStream = cfsDir.openInput(segment + ".frq", readBufferSize);
373 if (anyProx)
374 proxStream = cfsDir.openInput(segment + ".prx", readBufferSize);
375 openNorms(cfsDir, readBufferSize);
376
377 if (doOpenStores && fieldInfos.hasVectors()) { // open term vector files only as needed
378 final String vectorsSegment;
379 if (si.getDocStoreOffset() != -1)
380 vectorsSegment = si.getDocStoreSegment();
381 else
382 vectorsSegment = segment;
383 termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount);
384 }
385 success = true;
386 } finally {
387
388 // With lock-less commits, it's entirely possible (and
389 // fine) to hit a FileNotFound exception above. In
390 // this case, we want to explicitly close any subset
391 // of things that were opened so that we don't have to
392 // wait for a GC to do so.
393 if (!success) {
394 doClose();
395 }
396 }
397 }
398
399 private void loadDeletedDocs() throws IOException {
400 // NOTE: the bitvector is stored using the regular directory, not cfs
401 if (hasDeletions(si)) {
402 deletedDocs = new BitVector(directory(), si.getDelFileName());
403
404 assert si.getDelCount() == deletedDocs.count() :
405 "delete count mismatch: info=" + si.getDelCount() + " vs BitVector=" + deletedDocs.count();
406
407 // Verify # deletes does not exceed maxDoc for this
408 // segment:
409 assert si.getDelCount() <= maxDoc() :
410 "delete count mismatch: " + deletedDocs.count() + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name;
411
412 } else
413 assert si.getDelCount() == 0;
414 }
415
416 protected synchronized DirectoryIndexReader doReopen(SegmentInfos infos) throws CorruptIndexException, IOException {
417 DirectoryIndexReader newReader;
418
419 if (infos.size() == 1) {
420 SegmentInfo si = infos.info(0);
421 if (segment.equals(si.name) && si.getUseCompoundFile() == SegmentReader.this.si.getUseCompoundFile()) {
422 newReader = reopenSegment(si);
423 } else {
424 // segment not referenced anymore, reopen not possible
425 // or segment format changed
426 newReader = SegmentReader.get(readOnly, infos, infos.info(0), false);
427 }
428 } else {
429 if (readOnly)
430 return new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] {this}, null, null);
431 else
432 return new MultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] {this}, null, null, false);
433 }
434
435 return newReader;
436 }
437
438 synchronized SegmentReader reopenSegment(SegmentInfo si) throws CorruptIndexException, IOException {
439 boolean deletionsUpToDate = (this.si.hasDeletions() == si.hasDeletions())
440 && (!si.hasDeletions() || this.si.getDelFileName().equals(si.getDelFileName()));
441 boolean normsUpToDate = true;
442
443
444 boolean[] fieldNormsChanged = new boolean[fieldInfos.size()];
445 if (normsUpToDate) {
446 for (int i = 0; i < fieldInfos.size(); i++) {
447 if (!this.si.getNormFileName(i).equals(si.getNormFileName(i))) {
448 normsUpToDate = false;
449 fieldNormsChanged[i] = true;
450 }
451 }
452 }
453
454 if (normsUpToDate && deletionsUpToDate) {
455 return this;
456 }
457
458
459 // clone reader
460 SegmentReader clone;
461 if (readOnly)
462 clone = new ReadOnlySegmentReader();
463 else
464 clone = new SegmentReader();
465
466 boolean success = false;
467 try {
468 clone.readOnly = readOnly;
469 clone.directory = directory;
470 clone.si = si;
471 clone.segment = segment;
472 clone.readBufferSize = readBufferSize;
473 clone.cfsReader = cfsReader;
474 clone.storeCFSReader = storeCFSReader;
475
476 clone.fieldInfos = fieldInfos;
477 clone.tis = tis;
478 clone.freqStream = freqStream;
479 clone.proxStream = proxStream;
480 clone.termVectorsReaderOrig = termVectorsReaderOrig;
481
482
483 // we have to open a new FieldsReader, because it is not thread-safe
484 // and can thus not be shared among multiple SegmentReaders
485 // TODO: Change this in case FieldsReader becomes thread-safe in the future
486 final String fieldsSegment;
487
488 Directory storeDir = directory();
489
490 if (si.getDocStoreOffset() != -1) {
491 fieldsSegment = si.getDocStoreSegment();
492 if (storeCFSReader != null) {
493 storeDir = storeCFSReader;
494 }
495 } else {
496 fieldsSegment = segment;
497 if (cfsReader != null) {
498 storeDir = cfsReader;
499 }
500 }
501
502 if (fieldsReader != null) {
503 clone.fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize,
504 si.getDocStoreOffset(), si.docCount);
505 }
506
507
508 if (!deletionsUpToDate) {
509 // load deleted docs
510 clone.deletedDocs = null;
511 clone.loadDeletedDocs();
512 } else {
513 clone.deletedDocs = this.deletedDocs;
514 }
515
516 clone.norms = new HashMap();
517 if (!normsUpToDate) {
518 // load norms
519 for (int i = 0; i < fieldNormsChanged.length; i++) {
520 // copy unchanged norms to the cloned reader and incRef those norms
521 if (!fieldNormsChanged[i]) {
522 String curField = fieldInfos.fieldInfo(i).name;
523 Norm norm = (Norm) this.norms.get(curField);
524 norm.incRef();
525 clone.norms.put(curField, norm);
526 }
527 }
528
529 clone.openNorms(si.getUseCompoundFile() ? cfsReader : directory(), readBufferSize);
530 } else {
531 Iterator it = norms.keySet().iterator();
532 while (it.hasNext()) {
533 String field = (String) it.next();
534 Norm norm = (Norm) norms.get(field);
535 norm.incRef();
536 clone.norms.put(field, norm);
537 }
538 }
539
540 if (clone.singleNormStream == null) {
541 for (int i = 0; i < fieldInfos.size(); i++) {
542 FieldInfo fi = fieldInfos.fieldInfo(i);
543 if (fi.isIndexed && !fi.omitNorms) {
544 Directory d = si.getUseCompoundFile() ? cfsReader : directory();
545 String fileName = si.getNormFileName(fi.number);
546 if (si.hasSeparateNorms(fi.number)) {
547 continue;
548 }
549
550 if (fileName.endsWith("." + IndexFileNames.NORMS_EXTENSION)) {
551 clone.singleNormStream = d.openInput(fileName, readBufferSize);
552 break;
553 }
554 }
555 }
556 }
557
558 success = true;
559 } finally {
560 if (this.referencedSegmentReader != null) {
561 // this reader shares resources with another SegmentReader,
562 // so we increment the other readers refCount. We don't
563 // increment the refCount of the norms because we did
564 // that already for the shared norms
565 clone.referencedSegmentReader = this.referencedSegmentReader;
566 referencedSegmentReader.incRefReaderNotNorms();
567 } else {
568 // this reader wasn't reopened, so we increment this
569 // readers refCount
570 clone.referencedSegmentReader = this;
571 incRefReaderNotNorms();
572 }
573
574 if (!success) {
575 // An exception occured during reopen, we have to decRef the norms
576 // that we incRef'ed already and close singleNormsStream and FieldsReader
577 clone.decRef();
578 }
579 }
580
581 return clone;
582 }
583
584 protected void commitChanges() throws IOException {
585 if (deletedDocsDirty) { // re-write deleted
586 si.advanceDelGen();
587
588 // We can write directly to the actual name (vs to a
589 // .tmp & renaming it) because the file is not live
590 // until segments file is written:
591 deletedDocs.write(directory(), si.getDelFileName());
592
593 si.setDelCount(si.getDelCount()+pendingDeleteCount);
594 pendingDeleteCount = 0;
595 }
596 if (undeleteAll && si.hasDeletions()) {
597 si.clearDelGen();
598 si.setDelCount(0);
599 }
600 if (normsDirty) { // re-write norms
601 si.setNumFields(fieldInfos.size());
602 Iterator it = norms.values().iterator();
603 while (it.hasNext()) {
604 Norm norm = (Norm) it.next();
605 if (norm.dirty) {
606 norm.reWrite(si);
607 }
608 }
609 }
610 deletedDocsDirty = false;
611 normsDirty = false;
612 undeleteAll = false;
613 }
614
615 FieldsReader getFieldsReader() {
616 return fieldsReader;
617 }
618
619 protected void doClose() throws IOException {
620 boolean hasReferencedReader = (referencedSegmentReader != null);
621
622 termVectorsLocal.close();
623
624 if (hasReferencedReader) {
625 referencedSegmentReader.decRefReaderNotNorms();
626 referencedSegmentReader = null;
627 }
628
629 deletedDocs = null;
630
631 // close the single norms stream
632 if (singleNormStream != null) {
633 // we can close this stream, even if the norms
634 // are shared, because every reader has it's own
635 // singleNormStream
636 singleNormStream.close();
637 singleNormStream = null;
638 }
639
640 // re-opened SegmentReaders have their own instance of FieldsReader
641 if (fieldsReader != null) {
642 fieldsReader.close();
643 }
644
645 if (!hasReferencedReader) {
646 // close everything, nothing is shared anymore with other readers
647 if (tis != null) {
648 tis.close();
649 }
650
651 if (freqStream != null)
652 freqStream.close();
653 if (proxStream != null)
654 proxStream.close();
655
656 if (termVectorsReaderOrig != null)
657 termVectorsReaderOrig.close();
658
659 if (cfsReader != null)
660 cfsReader.close();
661
662 if (storeCFSReader != null)
663 storeCFSReader.close();
664 }
665
666 // In DirectoryIndexReader.reopen, our directory
667 // instance was made private to us (cloned), so we
668 // always call super.doClose to possibly close the
669 // directory:
670 super.doClose();
671 }
672
673 static boolean hasDeletions(SegmentInfo si) throws IOException {
674 // Don't call ensureOpen() here (it could affect performance)
675 return si.hasDeletions();
676 }
677
678 public boolean hasDeletions() {
679 // Don't call ensureOpen() here (it could affect performance)
680 return deletedDocs != null;
681 }
682
683 static boolean usesCompoundFile(SegmentInfo si) throws IOException {
684 return si.getUseCompoundFile();
685 }
686
687 static boolean hasSeparateNorms(SegmentInfo si) throws IOException {
688 return si.hasSeparateNorms();
689 }
690
691 protected void doDelete(int docNum) {
692 if (deletedDocs == null)
693 deletedDocs = new BitVector(maxDoc());
694 deletedDocsDirty = true;
695 undeleteAll = false;
696 if (!deletedDocs.getAndSet(docNum))
697 pendingDeleteCount++;
698 }
699
700 protected void doUndeleteAll() {
701 deletedDocs = null;
702 deletedDocsDirty = false;
703 undeleteAll = true;
704 }
705
706 List files() throws IOException {
707 return new ArrayList(si.files());
708 }
709
710 public TermEnum terms() {
711 ensureOpen();
712 return tis.terms();
713 }
714
715 public TermEnum terms(Term t) throws IOException {
716 ensureOpen();
717 return tis.terms(t);
718 }
719
720 FieldInfos getFieldInfos() {
721 return fieldInfos;
722 }
723
724 /**
725 * @throws CorruptIndexException if the index is corrupt
726 * @throws IOException if there is a low-level IO error
727 */
728 public synchronized Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
729 ensureOpen();
730 if (isDeleted(n))
731 throw new IllegalArgumentException
732 ("attempt to access a deleted document");
733 return fieldsReader.doc(n, fieldSelector);
734 }
735
736 public synchronized boolean isDeleted(int n) {
737 return (deletedDocs != null && deletedDocs.get(n));
738 }
739
740 public TermDocs termDocs() throws IOException {
741 ensureOpen();
742 return new SegmentTermDocs(this);
743 }
744
745 public TermPositions termPositions() throws IOException {
746 ensureOpen();
747 return new SegmentTermPositions(this);
748 }
749
750 public int docFreq(Term t) throws IOException {
751 ensureOpen();
752 TermInfo ti = tis.get(t);
753 if (ti != null)
754 return ti.docFreq;
755 else
756 return 0;
757 }
758
759 public int numDocs() {
760 // Don't call ensureOpen() here (it could affect performance)
761 int n = maxDoc();
762 if (deletedDocs != null)
763 n -= deletedDocs.count();
764 return n;
765 }
766
767 public int maxDoc() {
768 // Don't call ensureOpen() here (it could affect performance)
769 return si.docCount;
770 }
771
772 public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
773 tis.setIndexDivisor(indexDivisor);
774 }
775
776 public int getTermInfosIndexDivisor() {
777 return tis.getIndexDivisor();
778 }
779
780 /**
781 * @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
782 */
783 public Collection getFieldNames(IndexReader.FieldOption fieldOption) {
784 ensureOpen();
785
786 Set fieldSet = new HashSet();
787 for (int i = 0; i < fieldInfos.size(); i++) {
788 FieldInfo fi = fieldInfos.fieldInfo(i);
789 if (fieldOption == IndexReader.FieldOption.ALL) {
790 fieldSet.add(fi.name);
791 }
792 else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
793 fieldSet.add(fi.name);
794 }
795 else if (fi.omitTf && fieldOption == IndexReader.FieldOption.OMIT_TF) {
796 fieldSet.add(fi.name);
797 }
798 else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
799 fieldSet.add(fi.name);
800 }
801 else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) {
802 fieldSet.add(fi.name);
803 }
804 else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
805 fieldSet.add(fi.name);
806 }
807 else if (fi.storeTermVector == true &&
808 fi.storePositionWithTermVector == false &&
809 fi.storeOffsetWithTermVector == false &&
810 fieldOption == IndexReader.FieldOption.TERMVECTOR) {
811 fieldSet.add(fi.name);
812 }
813 else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
814 fieldSet.add(fi.name);
815 }
816 else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
817 fieldSet.add(fi.name);
818 }
819 else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
820 fieldSet.add(fi.name);
821 }
822 else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) &&
823 fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
824 fieldSet.add(fi.name);
825 }
826 }
827 return fieldSet;
828 }
829
830
831 public synchronized boolean hasNorms(String field) {
832 ensureOpen();
833 return norms.containsKey(field);
834 }
835
836 static byte[] createFakeNorms(int size) {
837 byte[] ones = new byte[size];
838 Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f));
839 return ones;
840 }
841
842 private byte[] ones;
843 private byte[] fakeNorms() {
844 if (ones==null) ones=createFakeNorms(maxDoc());
845 return ones;
846 }
847
848 // can return null if norms aren't stored
849 protected synchronized byte[] getNorms(String field) throws IOException {
850 Norm norm = (Norm) norms.get(field);
851 if (norm == null) return null; // not indexed, or norms not stored
852 synchronized(norm) {
853 if (norm.bytes == null) { // value not yet read
854 byte[] bytes = new byte[maxDoc()];
855 norms(field, bytes, 0);
856 norm.bytes = bytes; // cache it
857 // it's OK to close the underlying IndexInput as we have cached the
858 // norms and will never read them again.
859 norm.close();
860 }
861 return norm.bytes;
862 }
863 }
864
865 // returns fake norms if norms aren't available
866 public synchronized byte[] norms(String field) throws IOException {
867 ensureOpen();
868 byte[] bytes = getNorms(field);
869 if (bytes==null) bytes=fakeNorms();
870 return bytes;
871 }
872
873 protected void doSetNorm(int doc, String field, byte value)
874 throws IOException {
875 Norm norm = (Norm) norms.get(field);
876 if (norm == null) // not an indexed field
877 return;
878
879 norm.dirty = true; // mark it dirty
880 normsDirty = true;
881
882 norms(field)[doc] = value; // set the value
883 }
884
885 /** Read norms into a pre-allocated array. */
886 public synchronized void norms(String field, byte[] bytes, int offset)
887 throws IOException {
888
889 ensureOpen();
890 Norm norm = (Norm) norms.get(field);
891 if (norm == null) {
892 System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc());
893 return;
894 }
895
896 synchronized(norm) {
897 if (norm.bytes != null) { // can copy from cache
898 System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc());
899 return;
900 }
901
902 // Read from disk. norm.in may be shared across multiple norms and
903 // should only be used in a synchronized context.
904 IndexInput normStream;
905 if (norm.useSingleNormStream) {
906 normStream = singleNormStream;
907 } else {
908 normStream = norm.in;
909 }
910 normStream.seek(norm.normSeek);
911 normStream.readBytes(bytes, offset, maxDoc());
912 }
913 }
914
915
916 private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
917 long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
918 int maxDoc = maxDoc();
919 for (int i = 0; i < fieldInfos.size(); i++) {
920 FieldInfo fi = fieldInfos.fieldInfo(i);
921 if (norms.containsKey(fi.name)) {
922 // in case this SegmentReader is being re-opened, we might be able to
923 // reuse some norm instances and skip loading them here
924 continue;
925 }
926 if (fi.isIndexed && !fi.omitNorms) {
927 Directory d = directory();
928 String fileName = si.getNormFileName(fi.number);
929 if (!si.hasSeparateNorms(fi.number)) {
930 d = cfsDir;
931 }
932
933 // singleNormFile means multiple norms share this file
934 boolean singleNormFile = fileName.endsWith("." + IndexFileNames.NORMS_EXTENSION);
935 IndexInput normInput = null;
936 long normSeek;
937
938 if (singleNormFile) {
939 normSeek = nextNormSeek;
940 if (singleNormStream==null) {
941 singleNormStream = d.openInput(fileName, readBufferSize);
942 }
943 // All norms in the .nrm file can share a single IndexInput since
944 // they are only used in a synchronized context.
945 // If this were to change in the future, a clone could be done here.
946 normInput = singleNormStream;
947 } else {
948 normSeek = 0;
949 normInput = d.openInput(fileName);
950 }
951
952 norms.put(fi.name, new Norm(normInput, singleNormFile, fi.number, normSeek));
953 nextNormSeek += maxDoc; // increment also if some norms are separate
954 }
955 }
956 }
957
958 // for testing only
959 boolean normsClosed() {
960 if (singleNormStream != null) {
961 return false;
962 }
963 Iterator it = norms.values().iterator();
964 while (it.hasNext()) {
965 Norm norm = (Norm) it.next();
966 if (norm.refCount > 0) {
967 return false;
968 }
969 }
970 return true;
971 }
972
973 // for testing only
974 boolean normsClosed(String field) {
975 Norm norm = (Norm) norms.get(field);
976 return norm.refCount == 0;
977 }
978
979 /**
980 * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
981 * @return TermVectorsReader
982 */
983 private TermVectorsReader getTermVectorsReader() {
984 assert termVectorsReaderOrig != null;
985 TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get();
986 if (tvReader == null) {
987 try {
988 tvReader = (TermVectorsReader)termVectorsReaderOrig.clone();
989 } catch (CloneNotSupportedException cnse) {
990 return null;
991 }
992 termVectorsLocal.set(tvReader);
993 }
994 return tvReader;
995 }
996
997 /** Return a term frequency vector for the specified document and field. The
998 * vector returned contains term numbers and frequencies for all terms in
999 * the specified field of this document, if the field had storeTermVector
1000 * flag set. If the flag was not set, the method returns null.
1001 * @throws IOException
1002 */
1003 public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
1004 // Check if this field is invalid or has no stored term vector
1005 ensureOpen();
1006 FieldInfo fi = fieldInfos.fieldInfo(field);
1007 if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
1008 return null;
1009
1010 TermVectorsReader termVectorsReader = getTermVectorsReader();
1011 if (termVectorsReader == null)
1012 return null;
1013
1014 return termVectorsReader.get(docNumber, field);
1015 }
1016
1017
1018 public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
1019 ensureOpen();
1020 FieldInfo fi = fieldInfos.fieldInfo(field);
1021 if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
1022 return;
1023
1024 TermVectorsReader termVectorsReader = getTermVectorsReader();
1025 if (termVectorsReader == null)
1026 {
1027 return;
1028 }
1029
1030
1031 termVectorsReader.get(docNumber, field, mapper);
1032 }
1033
1034
1035 public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
1036 ensureOpen();
1037 if (termVectorsReaderOrig == null)
1038 return;
1039
1040 TermVectorsReader termVectorsReader = getTermVectorsReader();
1041 if (termVectorsReader == null)
1042 return;
1043
1044 termVectorsReader.get(docNumber, mapper);
1045 }
1046
1047 /** Return an array of term frequency vectors for the specified document.
1048 * The array contains a vector for each vectorized field in the document.
1049 * Each vector vector contains term numbers and frequencies for all terms
1050 * in a given vectorized field.
1051 * If no such fields existed, the method returns null.
1052 * @throws IOException
1053 */
1054 public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
1055 ensureOpen();
1056 if (termVectorsReaderOrig == null)
1057 return null;
1058
1059 TermVectorsReader termVectorsReader = getTermVectorsReader();
1060 if (termVectorsReader == null)
1061 return null;
1062
1063 return termVectorsReader.get(docNumber);
1064 }
1065
1066 /** Returns the field infos of this segment */
1067 FieldInfos fieldInfos() {
1068 return fieldInfos;
1069 }
1070
1071 /**
1072 * Return the name of the segment this reader is reading.
1073 */
1074 String getSegmentName() {
1075 return segment;
1076 }
1077
1078 /**
1079 * Return the SegmentInfo of the segment this reader is reading.
1080 */
1081 SegmentInfo getSegmentInfo() {
1082 return si;
1083 }
1084
1085 void setSegmentInfo(SegmentInfo info) {
1086 si = info;
1087 }
1088
1089 void startCommit() {
1090 super.startCommit();
1091 rollbackDeletedDocsDirty = deletedDocsDirty;
1092 rollbackNormsDirty = normsDirty;
1093 rollbackUndeleteAll = undeleteAll;
1094 rollbackPendingDeleteCount = pendingDeleteCount;
1095 Iterator it = norms.values().iterator();
1096 while (it.hasNext()) {
1097 Norm norm = (Norm) it.next();
1098 norm.rollbackDirty = norm.dirty;
1099 }
1100 }
1101
1102 void rollbackCommit() {
1103 super.rollbackCommit();
1104 deletedDocsDirty = rollbackDeletedDocsDirty;
1105 normsDirty = rollbackNormsDirty;
1106 undeleteAll = rollbackUndeleteAll;
1107 pendingDeleteCount = rollbackPendingDeleteCount;
1108 Iterator it = norms.values().iterator();
1109 while (it.hasNext()) {
1110 Norm norm = (Norm) it.next();
1111 norm.dirty = norm.rollbackDirty;
1112 }
1113 }
1114 }