1 package org.apache.lucene.index;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import org.apache.lucene.store.Directory;
21 import org.apache.lucene.store.IndexOutput;
22 import org.apache.lucene.store.IndexInput;
23 import java.util.LinkedList;
24 import java.util.HashSet;
25
26 import java.io.IOException;
27
28
29 /**
30 * Combines multiple files into a single compound file.
31 * The file format:<br>
32 * <ul>
33 * <li>VInt fileCount</li>
34 * <li>{Directory}
35 * fileCount entries with the following structure:</li>
36 * <ul>
37 * <li>long dataOffset</li>
38 * <li>String fileName</li>
39 * </ul>
40 * <li>{File Data}
41 * fileCount entries with the raw data of the corresponding file</li>
42 * </ul>
43 *
44 * The fileCount integer indicates how many files are contained in this compound
45 * file. The {directory} that follows has that many entries. Each directory entry
46 * contains a long pointer to the start of this file's data section, and a String
47 * with that file's name.
48 */
49 final class CompoundFileWriter {
50
51 private static final class FileEntry {
52 /** source file */
53 String file;
54
55 /** temporary holder for the start of directory entry for this file */
56 long directoryOffset;
57
58 /** temporary holder for the start of this file's data section */
59 long dataOffset;
60 }
61
62
63 private Directory directory;
64 private String fileName;
65 private HashSet<String> ids;
66 private LinkedList<FileEntry> entries;
67 private boolean merged = false;
68 private SegmentMerger.CheckAbort checkAbort;
69
70 /** Create the compound stream in the specified file. The file name is the
71 * entire name (no extensions are added).
72 * @throws NullPointerException if <code>dir</code> or <code>name</code> is null
73 */
74 public CompoundFileWriter(Directory dir, String name) {
75 this(dir, name, null);
76 }
77
78 CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) {
79 if (dir == null)
80 throw new NullPointerException("directory cannot be null");
81 if (name == null)
82 throw new NullPointerException("name cannot be null");
83 this.checkAbort = checkAbort;
84 directory = dir;
85 fileName = name;
86 ids = new HashSet<String>();
87 entries = new LinkedList<FileEntry>();
88 }
89
90 /** Returns the directory of the compound file. */
91 public Directory getDirectory() {
92 return directory;
93 }
94
95 /** Returns the name of the compound file. */
96 public String getName() {
97 return fileName;
98 }
99
100 /** Add a source stream. <code>file</code> is the string by which the
101 * sub-stream will be known in the compound stream.
102 *
103 * @throws IllegalStateException if this writer is closed
104 * @throws NullPointerException if <code>file</code> is null
105 * @throws IllegalArgumentException if a file with the same name
106 * has been added already
107 */
108 public void addFile(String file) {
109 if (merged)
110 throw new IllegalStateException(
111 "Can't add extensions after merge has been called");
112
113 if (file == null)
114 throw new NullPointerException(
115 "file cannot be null");
116
117 if (! ids.add(file))
118 throw new IllegalArgumentException(
119 "File " + file + " already added");
120
121 FileEntry entry = new FileEntry();
122 entry.file = file;
123 entries.add(entry);
124 }
125
126 /** Merge files with the extensions added up to now.
127 * All files with these extensions are combined sequentially into the
128 * compound stream. After successful merge, the source files
129 * are deleted.
130 * @throws IllegalStateException if close() had been called before or
131 * if no file has been added to this object
132 */
133 public void close() throws IOException {
134 if (merged)
135 throw new IllegalStateException(
136 "Merge already performed");
137
138 if (entries.isEmpty())
139 throw new IllegalStateException(
140 "No entries to merge have been defined");
141
142 merged = true;
143
144 // open the compound stream
145 IndexOutput os = null;
146 try {
147 os = directory.createOutput(fileName);
148
149 // Write the number of entries
150 os.writeVInt(entries.size());
151
152 // Write the directory with all offsets at 0.
153 // Remember the positions of directory entries so that we can
154 // adjust the offsets later
155 long totalSize = 0;
156 for (FileEntry fe : entries) {
157 fe.directoryOffset = os.getFilePointer();
158 os.writeLong(0); // for now
159 os.writeString(fe.file);
160 totalSize += directory.fileLength(fe.file);
161 }
162
163 // Pre-allocate size of file as optimization --
164 // this can potentially help IO performance as
165 // we write the file and also later during
166 // searching. It also uncovers a disk-full
167 // situation earlier and hopefully without
168 // actually filling disk to 100%:
169 final long finalLength = totalSize+os.getFilePointer();
170 os.setLength(finalLength);
171
172 // Open the files and copy their data into the stream.
173 // Remember the locations of each file's data section.
174 byte buffer[] = new byte[16384];
175 for (FileEntry fe : entries) {
176 fe.dataOffset = os.getFilePointer();
177 copyFile(fe, os, buffer);
178 }
179
180 // Write the data offsets into the directory of the compound stream
181 for (FileEntry fe : entries) {
182 os.seek(fe.directoryOffset);
183 os.writeLong(fe.dataOffset);
184 }
185
186 assert finalLength == os.length();
187
188 // Close the output stream. Set the os to null before trying to
189 // close so that if an exception occurs during the close, the
190 // finally clause below will not attempt to close the stream
191 // the second time.
192 IndexOutput tmp = os;
193 os = null;
194 tmp.close();
195
196 } finally {
197 if (os != null) try { os.close(); } catch (IOException e) { }
198 }
199 }
200
201 /** Copy the contents of the file with specified extension into the
202 * provided output stream. Use the provided buffer for moving data
203 * to reduce memory allocation.
204 */
205 private void copyFile(FileEntry source, IndexOutput os, byte buffer[])
206 throws IOException
207 {
208 IndexInput is = null;
209 try {
210 long startPtr = os.getFilePointer();
211
212 is = directory.openInput(source.file);
213 long length = is.length();
214 long remainder = length;
215 int chunk = buffer.length;
216
217 while(remainder > 0) {
218 int len = (int) Math.min(chunk, remainder);
219 is.readBytes(buffer, 0, len, false);
220 os.writeBytes(buffer, len);
221 remainder -= len;
222 if (checkAbort != null)
223 // Roughly every 2 MB we will check if
224 // it's time to abort
225 checkAbort.work(80);
226 }
227
228 // Verify that remainder is 0
229 if (remainder != 0)
230 throw new IOException(
231 "Non-zero remainder length after copying: " + remainder
232 + " (id: " + source.file + ", length: " + length
233 + ", buffer size: " + chunk + ")");
234
235 // Verify that the output length diff is equal to original file
236 long endPtr = os.getFilePointer();
237 long diff = endPtr - startPtr;
238 if (diff != length)
239 throw new IOException(
240 "Difference in the output file offsets " + diff
241 + " does not match the original file length " + length);
242
243 } finally {
244 if (is != null) is.close();
245 }
246 }
247 }