1 package org.apache.lucene.store;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import java.io.IOException;
21
22 /** Abstract base class for input from a file in a {@link Directory}. A
23 * random-access input stream. Used for all Lucene index input operations.
24 * @see Directory
25 */
26 public abstract class IndexInput implements Cloneable {
27 private byte[] bytes; // used by readString()
28 private char[] chars; // used by readModifiedUTF8String()
29 private boolean preUTF8Strings; // true if we are reading old (modified UTF8) string format
30
31 /** Reads and returns a single byte.
32 * @see IndexOutput#writeByte(byte)
33 */
34 public abstract byte readByte() throws IOException;
35
36 /** Reads a specified number of bytes into an array at the specified offset.
37 * @param b the array to read bytes into
38 * @param offset the offset in the array to start storing bytes
39 * @param len the number of bytes to read
40 * @see IndexOutput#writeBytes(byte[],int)
41 */
42 public abstract void readBytes(byte[] b, int offset, int len)
43 throws IOException;
44
45 /** Reads a specified number of bytes into an array at the
46 * specified offset with control over whether the read
47 * should be buffered (callers who have their own buffer
48 * should pass in "false" for useBuffer). Currently only
49 * {@link BufferedIndexInput} respects this parameter.
50 * @param b the array to read bytes into
51 * @param offset the offset in the array to start storing bytes
52 * @param len the number of bytes to read
53 * @param useBuffer set to false if the caller will handle
54 * buffering.
55 * @see IndexOutput#writeBytes(byte[],int)
56 */
57 public void readBytes(byte[] b, int offset, int len, boolean useBuffer)
58 throws IOException
59 {
60 // Default to ignoring useBuffer entirely
61 readBytes(b, offset, len);
62 }
63
64 /** Reads four bytes and returns an int.
65 * @see IndexOutput#writeInt(int)
66 */
67 public int readInt() throws IOException {
68 return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16)
69 | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF);
70 }
71
72 /** Reads an int stored in variable-length format. Reads between one and
73 * five bytes. Smaller values take fewer bytes. Negative numbers are not
74 * supported.
75 * @see IndexOutput#writeVInt(int)
76 */
77 public int readVInt() throws IOException {
78 byte b = readByte();
79 int i = b & 0x7F;
80 for (int shift = 7; (b & 0x80) != 0; shift += 7) {
81 b = readByte();
82 i |= (b & 0x7F) << shift;
83 }
84 return i;
85 }
86
87 /** Reads eight bytes and returns a long.
88 * @see IndexOutput#writeLong(long)
89 */
90 public long readLong() throws IOException {
91 return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL);
92 }
93
94 /** Reads a long stored in variable-length format. Reads between one and
95 * nine bytes. Smaller values take fewer bytes. Negative numbers are not
96 * supported. */
97 public long readVLong() throws IOException {
98 byte b = readByte();
99 long i = b & 0x7F;
100 for (int shift = 7; (b & 0x80) != 0; shift += 7) {
101 b = readByte();
102 i |= (b & 0x7FL) << shift;
103 }
104 return i;
105 }
106
107 /** Call this if readString should read characters stored
108 * in the old modified UTF8 format (length in java chars
109 * and java's modified UTF8 encoding). This is used for
110 * indices written pre-2.4 See LUCENE-510 for details. */
111 public void setModifiedUTF8StringsMode() {
112 preUTF8Strings = true;
113 }
114
115 /** Reads a string.
116 * @see IndexOutput#writeString(String)
117 */
118 public String readString() throws IOException {
119 if (preUTF8Strings)
120 return readModifiedUTF8String();
121 int length = readVInt();
122 if (bytes == null || length > bytes.length)
123 bytes = new byte[(int) (length*1.25)];
124 readBytes(bytes, 0, length);
125 return new String(bytes, 0, length, "UTF-8");
126 }
127
128 private String readModifiedUTF8String() throws IOException {
129 int length = readVInt();
130 if (chars == null || length > chars.length)
131 chars = new char[length];
132 readChars(chars, 0, length);
133 return new String(chars, 0, length);
134 }
135
136 /** Reads Lucene's old "modified UTF-8" encoded
137 * characters into an array.
138 * @param buffer the array to read characters into
139 * @param start the offset in the array to start storing characters
140 * @param length the number of characters to read
141 * @see IndexOutput#writeChars(String,int,int)
142 * @deprecated -- please use readString or readBytes
143 * instead, and construct the string
144 * from those utf8 bytes
145 */
146 public void readChars(char[] buffer, int start, int length)
147 throws IOException {
148 final int end = start + length;
149 for (int i = start; i < end; i++) {
150 byte b = readByte();
151 if ((b & 0x80) == 0)
152 buffer[i] = (char)(b & 0x7F);
153 else if ((b & 0xE0) != 0xE0) {
154 buffer[i] = (char)(((b & 0x1F) << 6)
155 | (readByte() & 0x3F));
156 } else
157 buffer[i] = (char)(((b & 0x0F) << 12)
158 | ((readByte() & 0x3F) << 6)
159 | (readByte() & 0x3F));
160 }
161 }
162
163 /**
164 * Expert
165 *
166 * Similar to {@link #readChars(char[], int, int)} but does not do any conversion operations on the bytes it is reading in. It still
167 * has to invoke {@link #readByte()} just as {@link #readChars(char[], int, int)} does, but it does not need a buffer to store anything
168 * and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine
169 * how many more bytes to read
170 * @param length The number of chars to read
171 * @deprecated this method operates on old "modified utf8" encoded
172 * strings
173 */
174 public void skipChars(int length) throws IOException{
175 for (int i = 0; i < length; i++) {
176 byte b = readByte();
177 if ((b & 0x80) == 0){
178 //do nothing, we only need one byte
179 }
180 else if ((b & 0xE0) != 0xE0) {
181 readByte();//read an additional byte
182 } else{
183 //read two additional bytes.
184 readByte();
185 readByte();
186 }
187 }
188 }
189
190
191 /** Closes the stream to futher operations. */
192 public abstract void close() throws IOException;
193
194 /** Returns the current position in this file, where the next read will
195 * occur.
196 * @see #seek(long)
197 */
198 public abstract long getFilePointer();
199
200 /** Sets current position in this file, where the next read will occur.
201 * @see #getFilePointer()
202 */
203 public abstract void seek(long pos) throws IOException;
204
205 /** The number of bytes in the file. */
206 public abstract long length();
207
208 /** Returns a clone of this stream.
209 *
210 * <p>Clones of a stream access the same data, and are positioned at the same
211 * point as the stream they were cloned from.
212 *
213 * <p>Expert: Subclasses must ensure that clones may be positioned at
214 * different points in the input from each other and from the stream they
215 * were cloned from.
216 */
217 public Object clone() {
218 IndexInput clone = null;
219 try {
220 clone = (IndexInput)super.clone();
221 } catch (CloneNotSupportedException e) {}
222
223 clone.bytes = null;
224 clone.chars = null;
225
226 return clone;
227 }
228
229 }