1 package org.apache.lucene.index;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import java.io.IOException;
21 import org.apache.lucene.util.BitVector;
22 import org.apache.lucene.store.IndexInput;
23
24 class SegmentTermDocs implements TermDocs {
25 protected SegmentReader parent;
26 protected IndexInput freqStream;
27 protected int count;
28 protected int df;
29 protected BitVector deletedDocs;
30 int doc = 0;
31 int freq;
32
33 private int skipInterval;
34 private int maxSkipLevels;
35 private DefaultSkipListReader skipListReader;
36
37 private long freqBasePointer;
38 private long proxBasePointer;
39
40 private long skipPointer;
41 private boolean haveSkipped;
42
43 protected boolean currentFieldStoresPayloads;
44 protected boolean currentFieldOmitTf;
45
46 protected SegmentTermDocs(SegmentReader parent) {
47 this.parent = parent;
48 this.freqStream = (IndexInput) parent.freqStream.clone();
49 this.deletedDocs = parent.deletedDocs;
50 this.skipInterval = parent.tis.getSkipInterval();
51 this.maxSkipLevels = parent.tis.getMaxSkipLevels();
52 }
53
54 public void seek(Term term) throws IOException {
55 TermInfo ti = parent.tis.get(term);
56 seek(ti, term);
57 }
58
59 public void seek(TermEnum termEnum) throws IOException {
60 TermInfo ti;
61 Term term;
62
63 // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
64 if (termEnum instanceof SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == parent.fieldInfos) { // optimized case
65 SegmentTermEnum segmentTermEnum = ((SegmentTermEnum) termEnum);
66 term = segmentTermEnum.term();
67 ti = segmentTermEnum.termInfo();
68 } else { // punt case
69 term = termEnum.term();
70 ti = parent.tis.get(term);
71 }
72
73 seek(ti, term);
74 }
75
76 void seek(TermInfo ti, Term term) throws IOException {
77 count = 0;
78 FieldInfo fi = parent.fieldInfos.fieldInfo(term.field);
79 currentFieldOmitTf = (fi != null) ? fi.omitTf : false;
80 currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
81 if (ti == null) {
82 df = 0;
83 } else {
84 df = ti.docFreq;
85 doc = 0;
86 freqBasePointer = ti.freqPointer;
87 proxBasePointer = ti.proxPointer;
88 skipPointer = freqBasePointer + ti.skipOffset;
89 freqStream.seek(freqBasePointer);
90 haveSkipped = false;
91 }
92 }
93
94 public void close() throws IOException {
95 freqStream.close();
96 if (skipListReader != null)
97 skipListReader.close();
98 }
99
100 public final int doc() { return doc; }
101 public final int freq() { return freq; }
102
103 protected void skippingDoc() throws IOException {
104 }
105
106 public boolean next() throws IOException {
107 while (true) {
108 if (count == df)
109 return false;
110 final int docCode = freqStream.readVInt();
111
112 if (currentFieldOmitTf) {
113 doc += docCode;
114 freq = 1;
115 } else {
116 doc += docCode >>> 1; // shift off low bit
117 if ((docCode & 1) != 0) // if low bit is set
118 freq = 1; // freq is one
119 else
120 freq = freqStream.readVInt(); // else read freq
121 }
122
123 count++;
124
125 if (deletedDocs == null || !deletedDocs.get(doc))
126 break;
127 skippingDoc();
128 }
129 return true;
130 }
131
132 /** Optimized implementation. */
133 public int read(final int[] docs, final int[] freqs)
134 throws IOException {
135 final int length = docs.length;
136 if (currentFieldOmitTf) {
137 return readNoTf(docs, freqs, length);
138 } else {
139 int i = 0;
140 while (i < length && count < df) {
141 // manually inlined call to next() for speed
142 final int docCode = freqStream.readVInt();
143 doc += docCode >>> 1; // shift off low bit
144 if ((docCode & 1) != 0) // if low bit is set
145 freq = 1; // freq is one
146 else
147 freq = freqStream.readVInt(); // else read freq
148 count++;
149
150 if (deletedDocs == null || !deletedDocs.get(doc)) {
151 docs[i] = doc;
152 freqs[i] = freq;
153 ++i;
154 }
155 }
156 return i;
157 }
158 }
159
160 private final int readNoTf(final int[] docs, final int[] freqs, final int length) throws IOException {
161 int i = 0;
162 while (i < length && count < df) {
163 // manually inlined call to next() for speed
164 doc += freqStream.readVInt();
165 count++;
166
167 if (deletedDocs == null || !deletedDocs.get(doc)) {
168 docs[i] = doc;
169 // Hardware freq to 1 when term freqs were not
170 // stored in the index
171 freqs[i] = 1;
172 ++i;
173 }
174 }
175 return i;
176 }
177
178
179 /** Overridden by SegmentTermPositions to skip in prox stream. */
180 protected void skipProx(long proxPointer, int payloadLength) throws IOException {}
181
182 /** Optimized implementation. */
183 public boolean skipTo(int target) throws IOException {
184 if (df >= skipInterval) { // optimized case
185 if (skipListReader == null)
186 skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone
187
188 if (!haveSkipped) { // lazily initialize skip stream
189 skipListReader.init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads);
190 haveSkipped = true;
191 }
192
193 int newCount = skipListReader.skipTo(target);
194 if (newCount > count) {
195 freqStream.seek(skipListReader.getFreqPointer());
196 skipProx(skipListReader.getProxPointer(), skipListReader.getPayloadLength());
197
198 doc = skipListReader.getDoc();
199 count = newCount;
200 }
201 }
202
203 // done skipping, now just scan
204 do {
205 if (!next())
206 return false;
207 } while (target > doc);
208 return true;
209 }
210 }