Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » index » [javadoc | source]
    1   package org.apache.lucene.index;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import org.apache.lucene.store.Directory;
   21   import org.apache.lucene.store.IndexOutput;
   22   import org.apache.lucene.store.IndexInput;
   23   import java.util.LinkedList;
   24   import java.util.HashSet;
   25   
   26   import java.io.IOException;
   27   
   28   
   29   /**
   30    * Combines multiple files into a single compound file.
   31    * The file format:<br>
   32    * <ul>
   33    *     <li>VInt fileCount</li>
   34    *     <li>{Directory}
   35    *         fileCount entries with the following structure:</li>
   36    *         <ul>
   37    *             <li>long dataOffset</li>
   38    *             <li>String fileName</li>
   39    *         </ul>
   40    *     <li>{File Data}
   41    *         fileCount entries with the raw data of the corresponding file</li>
   42    * </ul>
   43    *
   44    * The fileCount integer indicates how many files are contained in this compound
   45    * file. The {directory} that follows has that many entries. Each directory entry
   46    * contains a long pointer to the start of this file's data section, and a String
   47    * with that file's name.
   48    */
   49   final class CompoundFileWriter {
   50   
   51       private static final class FileEntry {
   52           /** source file */
   53           String file;
   54   
   55           /** temporary holder for the start of directory entry for this file */
   56           long directoryOffset;
   57   
   58           /** temporary holder for the start of this file's data section */
   59           long dataOffset;
   60       }
   61   
   62   
   63       private Directory directory;
   64       private String fileName;
   65       private HashSet<String> ids;
   66       private LinkedList<FileEntry> entries;
   67       private boolean merged = false;
   68       private SegmentMerger.CheckAbort checkAbort;
   69   
   70       /** Create the compound stream in the specified file. The file name is the
   71        *  entire name (no extensions are added).
   72        *  @throws NullPointerException if <code>dir</code> or <code>name</code> is null
   73        */
   74       public CompoundFileWriter(Directory dir, String name) {
   75         this(dir, name, null);
   76       }
   77   
   78       CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) {
   79           if (dir == null)
   80               throw new NullPointerException("directory cannot be null");
   81           if (name == null)
   82               throw new NullPointerException("name cannot be null");
   83           this.checkAbort = checkAbort;
   84           directory = dir;
   85           fileName = name;
   86           ids = new HashSet<String>();
   87           entries = new LinkedList<FileEntry>();
   88       }
   89   
   90       /** Returns the directory of the compound file. */
   91       public Directory getDirectory() {
   92           return directory;
   93       }
   94   
   95       /** Returns the name of the compound file. */
   96       public String getName() {
   97           return fileName;
   98       }
   99   
  100       /** Add a source stream. <code>file</code> is the string by which the 
  101        *  sub-stream will be known in the compound stream.
  102        * 
  103        *  @throws IllegalStateException if this writer is closed
  104        *  @throws NullPointerException if <code>file</code> is null
  105        *  @throws IllegalArgumentException if a file with the same name
  106        *   has been added already
  107        */
  108       public void addFile(String file) {
  109           if (merged)
  110               throw new IllegalStateException(
  111                   "Can't add extensions after merge has been called");
  112   
  113           if (file == null)
  114               throw new NullPointerException(
  115                   "file cannot be null");
  116   
  117           if (! ids.add(file))
  118               throw new IllegalArgumentException(
  119                   "File " + file + " already added");
  120   
  121           FileEntry entry = new FileEntry();
  122           entry.file = file;
  123           entries.add(entry);
  124       }
  125   
  126       /** Merge files with the extensions added up to now.
  127        *  All files with these extensions are combined sequentially into the
  128        *  compound stream. After successful merge, the source files
  129        *  are deleted.
  130        *  @throws IllegalStateException if close() had been called before or
  131        *   if no file has been added to this object
  132        */
  133       public void close() throws IOException {
  134           if (merged)
  135               throw new IllegalStateException(
  136                   "Merge already performed");
  137   
  138           if (entries.isEmpty())
  139               throw new IllegalStateException(
  140                   "No entries to merge have been defined");
  141   
  142           merged = true;
  143   
  144           // open the compound stream
  145           IndexOutput os = null;
  146           try {
  147               os = directory.createOutput(fileName);
  148   
  149               // Write the number of entries
  150               os.writeVInt(entries.size());
  151   
  152               // Write the directory with all offsets at 0.
  153               // Remember the positions of directory entries so that we can
  154               // adjust the offsets later
  155               long totalSize = 0;
  156               for (FileEntry fe : entries) {
  157                   fe.directoryOffset = os.getFilePointer();
  158                   os.writeLong(0);    // for now
  159                   os.writeString(fe.file);
  160                   totalSize += directory.fileLength(fe.file);
  161               }
  162   
  163               // Pre-allocate size of file as optimization --
  164               // this can potentially help IO performance as
  165               // we write the file and also later during
  166               // searching.  It also uncovers a disk-full
  167               // situation earlier and hopefully without
  168               // actually filling disk to 100%:
  169               final long finalLength = totalSize+os.getFilePointer();
  170               os.setLength(finalLength);
  171   
  172               // Open the files and copy their data into the stream.
  173               // Remember the locations of each file's data section.
  174               byte buffer[] = new byte[16384];
  175               for (FileEntry fe : entries) {
  176                   fe.dataOffset = os.getFilePointer();
  177                   copyFile(fe, os, buffer);
  178               }
  179   
  180               // Write the data offsets into the directory of the compound stream
  181               for (FileEntry fe : entries) {
  182                   os.seek(fe.directoryOffset);
  183                   os.writeLong(fe.dataOffset);
  184               }
  185   
  186               assert finalLength == os.length();
  187   
  188               // Close the output stream. Set the os to null before trying to
  189               // close so that if an exception occurs during the close, the
  190               // finally clause below will not attempt to close the stream
  191               // the second time.
  192               IndexOutput tmp = os;
  193               os = null;
  194               tmp.close();
  195   
  196           } finally {
  197               if (os != null) try { os.close(); } catch (IOException e) { }
  198           }
  199       }
  200   
  201       /** Copy the contents of the file with specified extension into the
  202        *  provided output stream. Use the provided buffer for moving data
  203        *  to reduce memory allocation.
  204        */
  205       private void copyFile(FileEntry source, IndexOutput os, byte buffer[])
  206       throws IOException
  207       {
  208           IndexInput is = null;
  209           try {
  210               long startPtr = os.getFilePointer();
  211   
  212               is = directory.openInput(source.file);
  213               long length = is.length();
  214               long remainder = length;
  215               int chunk = buffer.length;
  216   
  217               while(remainder > 0) {
  218                   int len = (int) Math.min(chunk, remainder);
  219                   is.readBytes(buffer, 0, len, false);
  220                   os.writeBytes(buffer, len);
  221                   remainder -= len;
  222                   if (checkAbort != null)
  223                     // Roughly every 2 MB we will check if
  224                     // it's time to abort
  225                     checkAbort.work(80);
  226               }
  227   
  228               // Verify that remainder is 0
  229               if (remainder != 0)
  230                   throw new IOException(
  231                       "Non-zero remainder length after copying: " + remainder
  232                       + " (id: " + source.file + ", length: " + length
  233                       + ", buffer size: " + chunk + ")");
  234   
  235               // Verify that the output length diff is equal to original file
  236               long endPtr = os.getFilePointer();
  237               long diff = endPtr - startPtr;
  238               if (diff != length)
  239                   throw new IOException(
  240                       "Difference in the output file offsets " + diff
  241                       + " does not match the original file length " + length);
  242   
  243           } finally {
  244               if (is != null) is.close();
  245           }
  246       }
  247   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » index » [javadoc | source]