1 package org.apache.lucene.index;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import java.io.IOException;
21 import org.apache.lucene.store.IndexInput;
22
23 final class SegmentTermEnum extends TermEnum implements Cloneable {
24 private IndexInput input;
25 FieldInfos fieldInfos;
26 long size;
27 long position = -1;
28
29 private TermBuffer termBuffer = new TermBuffer();
30 private TermBuffer prevBuffer = new TermBuffer();
31 private TermBuffer scanBuffer = new TermBuffer(); // used for scanning
32
33 private TermInfo termInfo = new TermInfo();
34
35 private int format;
36 private boolean isIndex = false;
37 long indexPointer = 0;
38 int indexInterval;
39 int skipInterval;
40 int maxSkipLevels;
41 private int formatM1SkipInterval;
42
43 SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi)
44 throws CorruptIndexException, IOException {
45 input = i;
46 fieldInfos = fis;
47 isIndex = isi;
48 maxSkipLevels = 1; // use single-level skip lists for formats > -3
49
50 int firstInt = input.readInt();
51 if (firstInt >= 0) {
52 // original-format file, without explicit format version number
53 format = 0;
54 size = firstInt;
55
56 // back-compatible settings
57 indexInterval = 128;
58 skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
59 } else {
60 // we have a format version number
61 format = firstInt;
62
63 // check that it is a format we can understand
64 if (format < TermInfosWriter.FORMAT_CURRENT)
65 throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher");
66
67 size = input.readLong(); // read the size
68
69 if(format == -1){
70 if (!isIndex) {
71 indexInterval = input.readInt();
72 formatM1SkipInterval = input.readInt();
73 }
74 // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
75 // skipTo implementation of these versions
76 skipInterval = Integer.MAX_VALUE;
77 } else {
78 indexInterval = input.readInt();
79 skipInterval = input.readInt();
80 if (format <= TermInfosWriter.FORMAT) {
81 // this new format introduces multi-level skipping
82 maxSkipLevels = input.readInt();
83 }
84 }
85 }
86 if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
87 termBuffer.setPreUTF8Strings();
88 scanBuffer.setPreUTF8Strings();
89 prevBuffer.setPreUTF8Strings();
90 }
91 }
92
93 protected Object clone() {
94 SegmentTermEnum clone = null;
95 try {
96 clone = (SegmentTermEnum) super.clone();
97 } catch (CloneNotSupportedException e) {}
98
99 clone.input = (IndexInput) input.clone();
100 clone.termInfo = new TermInfo(termInfo);
101
102 clone.termBuffer = (TermBuffer)termBuffer.clone();
103 clone.prevBuffer = (TermBuffer)prevBuffer.clone();
104 clone.scanBuffer = new TermBuffer();
105
106 return clone;
107 }
108
109 final void seek(long pointer, int p, Term t, TermInfo ti)
110 throws IOException {
111 input.seek(pointer);
112 position = p;
113 termBuffer.set(t);
114 prevBuffer.reset();
115 termInfo.set(ti);
116 }
117
118 /** Increments the enumeration to the next element. True if one exists.*/
119 public final boolean next() throws IOException {
120 if (position++ >= size - 1) {
121 prevBuffer.set(termBuffer);
122 termBuffer.reset();
123 return false;
124 }
125
126 prevBuffer.set(termBuffer);
127 termBuffer.read(input, fieldInfos);
128
129 termInfo.docFreq = input.readVInt(); // read doc freq
130 termInfo.freqPointer += input.readVLong(); // read freq pointer
131 termInfo.proxPointer += input.readVLong(); // read prox pointer
132
133 if(format == -1){
134 // just read skipOffset in order to increment file pointer;
135 // value is never used since skipTo is switched off
136 if (!isIndex) {
137 if (termInfo.docFreq > formatM1SkipInterval) {
138 termInfo.skipOffset = input.readVInt();
139 }
140 }
141 }
142 else{
143 if (termInfo.docFreq >= skipInterval)
144 termInfo.skipOffset = input.readVInt();
145 }
146
147 if (isIndex)
148 indexPointer += input.readVLong(); // read index pointer
149
150 return true;
151 }
152
153 /** Optimized scan, without allocating new terms.
154 * Return number of invocations to next(). */
155 final int scanTo(Term term) throws IOException {
156 scanBuffer.set(term);
157 int count = 0;
158 while (scanBuffer.compareTo(termBuffer) > 0 && next()) {
159 count++;
160 }
161 return count;
162 }
163
164 /** Returns the current Term in the enumeration.
165 Initially invalid, valid after next() called for the first time.*/
166 public final Term term() {
167 return termBuffer.toTerm();
168 }
169
170 /** Returns the previous Term enumerated. Initially null.*/
171 final Term prev() {
172 return prevBuffer.toTerm();
173 }
174
175 /** Returns the current TermInfo in the enumeration.
176 Initially invalid, valid after next() called for the first time.*/
177 final TermInfo termInfo() {
178 return new TermInfo(termInfo);
179 }
180
181 /** Sets the argument to the current TermInfo in the enumeration.
182 Initially invalid, valid after next() called for the first time.*/
183 final void termInfo(TermInfo ti) {
184 ti.set(termInfo);
185 }
186
187 /** Returns the docFreq from the current TermInfo in the enumeration.
188 Initially invalid, valid after next() called for the first time.*/
189 public final int docFreq() {
190 return termInfo.docFreq;
191 }
192
193 /* Returns the freqPointer from the current TermInfo in the enumeration.
194 Initially invalid, valid after next() called for the first time.*/
195 final long freqPointer() {
196 return termInfo.freqPointer;
197 }
198
199 /* Returns the proxPointer from the current TermInfo in the enumeration.
200 Initially invalid, valid after next() called for the first time.*/
201 final long proxPointer() {
202 return termInfo.proxPointer;
203 }
204
205 /** Closes the enumeration to further activity, freeing resources. */
206 public final void close() throws IOException {
207 input.close();
208 }
209 }