Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » store » [javadoc | source]
    1   package org.apache.lucene.store;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.IOException;
   21   
   22   /** Abstract base class for input from a file in a {@link Directory}.  A
   23    * random-access input stream.  Used for all Lucene index input operations.
   24    * @see Directory
   25    */
   26   public abstract class IndexInput implements Cloneable {
   27     private byte[] bytes;                           // used by readString()
   28     private char[] chars;                           // used by readModifiedUTF8String()
   29     private boolean preUTF8Strings;                 // true if we are reading old (modified UTF8) string format
   30   
   31     /** Reads and returns a single byte.
   32      * @see IndexOutput#writeByte(byte)
   33      */
   34     public abstract byte readByte() throws IOException;
   35   
   36     /** Reads a specified number of bytes into an array at the specified offset.
   37      * @param b the array to read bytes into
   38      * @param offset the offset in the array to start storing bytes
   39      * @param len the number of bytes to read
   40      * @see IndexOutput#writeBytes(byte[],int)
   41      */
   42     public abstract void readBytes(byte[] b, int offset, int len)
   43       throws IOException;
   44   
   45     /** Reads a specified number of bytes into an array at the
   46      * specified offset with control over whether the read
   47      * should be buffered (callers who have their own buffer
   48      * should pass in "false" for useBuffer).  Currently only
   49      * {@link BufferedIndexInput} respects this parameter.
   50      * @param b the array to read bytes into
   51      * @param offset the offset in the array to start storing bytes
   52      * @param len the number of bytes to read
   53      * @param useBuffer set to false if the caller will handle
   54      * buffering.
   55      * @see IndexOutput#writeBytes(byte[],int)
   56      */
   57     public void readBytes(byte[] b, int offset, int len, boolean useBuffer)
   58       throws IOException
   59     {
   60       // Default to ignoring useBuffer entirely
   61       readBytes(b, offset, len);
   62     }
   63   
   64     /** Reads four bytes and returns an int.
   65      * @see IndexOutput#writeInt(int)
   66      */
   67     public int readInt() throws IOException {
   68       return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16)
   69            | ((readByte() & 0xFF) <<  8) |  (readByte() & 0xFF);
   70     }
   71   
   72     /** Reads an int stored in variable-length format.  Reads between one and
   73      * five bytes.  Smaller values take fewer bytes.  Negative numbers are not
   74      * supported.
   75      * @see IndexOutput#writeVInt(int)
   76      */
   77     public int readVInt() throws IOException {
   78       byte b = readByte();
   79       int i = b & 0x7F;
   80       for (int shift = 7; (b & 0x80) != 0; shift += 7) {
   81         b = readByte();
   82         i |= (b & 0x7F) << shift;
   83       }
   84       return i;
   85     }
   86   
   87     /** Reads eight bytes and returns a long.
   88      * @see IndexOutput#writeLong(long)
   89      */
   90     public long readLong() throws IOException {
   91       return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL);
   92     }
   93   
   94     /** Reads a long stored in variable-length format.  Reads between one and
   95      * nine bytes.  Smaller values take fewer bytes.  Negative numbers are not
   96      * supported. */
   97     public long readVLong() throws IOException {
   98       byte b = readByte();
   99       long i = b & 0x7F;
  100       for (int shift = 7; (b & 0x80) != 0; shift += 7) {
  101         b = readByte();
  102         i |= (b & 0x7FL) << shift;
  103       }
  104       return i;
  105     }
  106   
  107     /** Call this if readString should read characters stored
  108      *  in the old modified UTF8 format (length in java chars
  109      *  and java's modified UTF8 encoding).  This is used for
  110      *  indices written pre-2.4 See LUCENE-510 for details. */
  111     public void setModifiedUTF8StringsMode() {
  112       preUTF8Strings = true;
  113     }
  114   
  115     /** Reads a string.
  116      * @see IndexOutput#writeString(String)
  117      */
  118     public String readString() throws IOException {
  119       if (preUTF8Strings)
  120         return readModifiedUTF8String();
  121       int length = readVInt();
  122       if (bytes == null || length > bytes.length)
  123         bytes = new byte[(int) (length*1.25)];
  124       readBytes(bytes, 0, length);
  125       return new String(bytes, 0, length, "UTF-8");
  126     }
  127   
  128     private String readModifiedUTF8String() throws IOException {
  129       int length = readVInt();
  130       if (chars == null || length > chars.length)
  131         chars = new char[length];
  132       readChars(chars, 0, length);
  133       return new String(chars, 0, length);
  134     }
  135   
  136     /** Reads Lucene's old "modified UTF-8" encoded
  137      *  characters into an array.
  138      * @param buffer the array to read characters into
  139      * @param start the offset in the array to start storing characters
  140      * @param length the number of characters to read
  141      * @see IndexOutput#writeChars(String,int,int)
  142      * @deprecated -- please use readString or readBytes
  143      *                instead, and construct the string
  144      *                from those utf8 bytes
  145      */
  146     public void readChars(char[] buffer, int start, int length)
  147          throws IOException {
  148       final int end = start + length;
  149       for (int i = start; i < end; i++) {
  150         byte b = readByte();
  151         if ((b & 0x80) == 0)
  152   	buffer[i] = (char)(b & 0x7F);
  153         else if ((b & 0xE0) != 0xE0) {
  154   	buffer[i] = (char)(((b & 0x1F) << 6)
  155   		 | (readByte() & 0x3F));
  156         } else
  157   	buffer[i] = (char)(((b & 0x0F) << 12)
  158   		| ((readByte() & 0x3F) << 6)
  159   	        |  (readByte() & 0x3F));
  160       }
  161     }
  162   
  163     /**
  164      * Expert
  165      * 
  166      * Similar to {@link #readChars(char[], int, int)} but does not do any conversion operations on the bytes it is reading in.  It still
  167      * has to invoke {@link #readByte()} just as {@link #readChars(char[], int, int)} does, but it does not need a buffer to store anything
  168      * and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine
  169      * how many more bytes to read
  170      * @param length The number of chars to read
  171      * @deprecated this method operates on old "modified utf8" encoded
  172      *             strings
  173      */
  174     public void skipChars(int length) throws IOException{
  175       for (int i = 0; i < length; i++) {
  176         byte b = readByte();
  177         if ((b & 0x80) == 0){
  178           //do nothing, we only need one byte
  179         }
  180         else if ((b & 0xE0) != 0xE0) {
  181           readByte();//read an additional byte
  182         } else{      
  183           //read two additional bytes.
  184           readByte();
  185           readByte();
  186         }
  187       }
  188     }
  189     
  190   
  191     /** Closes the stream to futher operations. */
  192     public abstract void close() throws IOException;
  193   
  194     /** Returns the current position in this file, where the next read will
  195      * occur.
  196      * @see #seek(long)
  197      */
  198     public abstract long getFilePointer();
  199   
  200     /** Sets current position in this file, where the next read will occur.
  201      * @see #getFilePointer()
  202      */
  203     public abstract void seek(long pos) throws IOException;
  204   
  205     /** The number of bytes in the file. */
  206     public abstract long length();
  207   
  208     /** Returns a clone of this stream.
  209      *
  210      * <p>Clones of a stream access the same data, and are positioned at the same
  211      * point as the stream they were cloned from.
  212      *
  213      * <p>Expert: Subclasses must ensure that clones may be positioned at
  214      * different points in the input from each other and from the stream they
  215      * were cloned from.
  216      */
  217     public Object clone() {
  218       IndexInput clone = null;
  219       try {
  220         clone = (IndexInput)super.clone();
  221       } catch (CloneNotSupportedException e) {}
  222   
  223       clone.bytes = null;
  224       clone.chars = null;
  225   
  226       return clone;
  227     }
  228   
  229   }

Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » store » [javadoc | source]