1
2 /* ====================================================================
3 Licensed to the Apache Software Foundation (ASF) under one or more
4 contributor license agreements. See the NOTICE file distributed with
5 this work for additional information regarding copyright ownership.
6 The ASF licenses this file to You under the Apache License, Version 2.0
7 (the "License"); you may not use this file except in compliance with
8 the License. You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 ==================================================================== */
18
19
20 package org.apache.poi.poifs.filesystem;
21
22 import java.io.ByteArrayInputStream;
23 import java.io.FileInputStream;
24 import java.io.FileOutputStream;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.OutputStream;
28 import java.io.PushbackInputStream;
29 import java.util.ArrayList;
30 import java.util.Collections;
31 import java.util.Iterator;
32 import java.util.List;
33
34 import org.apache.poi.poifs.common.POIFSConstants;
35 import org.apache.poi.poifs.dev.POIFSViewable;
36 import org.apache.poi.poifs.property.DirectoryProperty;
37 import org.apache.poi.poifs.property.Property;
38 import org.apache.poi.poifs.property.PropertyTable;
39 import org.apache.poi.poifs.storage.BATBlock;
40 import org.apache.poi.poifs.storage.BlockAllocationTableReader;
41 import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
42 import org.apache.poi.poifs.storage.BlockList;
43 import org.apache.poi.poifs.storage.BlockWritable;
44 import org.apache.poi.poifs.storage.HeaderBlockConstants;
45 import org.apache.poi.poifs.storage.HeaderBlockReader;
46 import org.apache.poi.poifs.storage.HeaderBlockWriter;
47 import org.apache.poi.poifs.storage.RawDataBlockList;
48 import org.apache.poi.poifs.storage.SmallBlockTableReader;
49 import org.apache.poi.poifs.storage.SmallBlockTableWriter;
50 import org.apache.poi.util.IOUtils;
51 import org.apache.poi.util.LongField;
52 import org.apache.poi.util.POILogFactory;
53 import org.apache.poi.util.POILogger;
54
55 /**
56 * This is the main class of the POIFS system; it manages the entire
57 * life cycle of the filesystem.
58 *
59 * @author Marc Johnson (mjohnson at apache dot org)
60 */
61
62 public class POIFSFileSystem
63 implements POIFSViewable
64 {
65 private static final POILogger _logger =
66 POILogFactory.getLogger(POIFSFileSystem.class);
67
68 private static final class CloseIgnoringInputStream extends InputStream {
69
70 private final InputStream _is;
71 public CloseIgnoringInputStream(InputStream is) {
72 _is = is;
73 }
74 public int read() throws IOException {
75 return _is.read();
76 }
77 public int read(byte[] b, int off, int len) throws IOException {
78 return _is.read(b, off, len);
79 }
80 public void close() {
81 // do nothing
82 }
83 }
84
85 /**
86 * Convenience method for clients that want to avoid the auto-close behaviour of the constructor.
87 */
88 public static InputStream createNonClosingInputStream(InputStream is) {
89 return new CloseIgnoringInputStream(is);
90 }
91
92 private PropertyTable _property_table;
93 private List _documents;
94 private DirectoryNode _root;
95
96 /**
97 * What big block size the file uses. Most files
98 * use 512 bytes, but a few use 4096
99 */
100 private int bigBlockSize = POIFSConstants.BIG_BLOCK_SIZE;
101
102 /**
103 * Constructor, intended for writing
104 */
105 public POIFSFileSystem()
106 {
107 _property_table = new PropertyTable();
108 _documents = new ArrayList();
109 _root = null;
110 }
111
112 /**
113 * Create a POIFSFileSystem from an <tt>InputStream</tt>. Normally the stream is read until
114 * EOF. The stream is always closed.<p/>
115 *
116 * Some streams are usable after reaching EOF (typically those that return <code>true</code>
117 * for <tt>markSupported()</tt>). In the unlikely case that the caller has such a stream
118 * <i>and</i> needs to use it after this constructor completes, a work around is to wrap the
119 * stream in order to trap the <tt>close()</tt> call. A convenience method (
120 * <tt>createNonClosingInputStream()</tt>) has been provided for this purpose:
121 * <pre>
122 * InputStream wrappedStream = POIFSFileSystem.createNonClosingInputStream(is);
123 * HSSFWorkbook wb = new HSSFWorkbook(wrappedStream);
124 * is.reset();
125 * doSomethingElse(is);
126 * </pre>
127 * Note also the special case of <tt>ByteArrayInputStream</tt> for which the <tt>close()</tt>
128 * method does nothing.
129 * <pre>
130 * ByteArrayInputStream bais = ...
131 * HSSFWorkbook wb = new HSSFWorkbook(bais); // calls bais.close() !
132 * bais.reset(); // no problem
133 * doSomethingElse(bais);
134 * </pre>
135 *
136 * @param stream the InputStream from which to read the data
137 *
138 * @exception IOException on errors reading, or on invalid data
139 */
140
141 public POIFSFileSystem(InputStream stream)
142 throws IOException
143 {
144 this();
145 boolean success = false;
146
147 HeaderBlockReader header_block_reader;
148 RawDataBlockList data_blocks;
149 try {
150 // read the header block from the stream
151 header_block_reader = new HeaderBlockReader(stream);
152 bigBlockSize = header_block_reader.getBigBlockSize();
153
154 // read the rest of the stream into blocks
155 data_blocks = new RawDataBlockList(stream, bigBlockSize);
156 success = true;
157 } finally {
158 closeInputStream(stream, success);
159 }
160
161
162 // set up the block allocation table (necessary for the
163 // data_blocks to be manageable
164 new BlockAllocationTableReader(header_block_reader.getBATCount(),
165 header_block_reader.getBATArray(),
166 header_block_reader.getXBATCount(),
167 header_block_reader.getXBATIndex(),
168 data_blocks);
169
170 // get property table from the document
171 PropertyTable properties =
172 new PropertyTable(header_block_reader.getPropertyStart(),
173 data_blocks);
174
175 // init documents
176 processProperties(SmallBlockTableReader
177 .getSmallDocumentBlocks(data_blocks, properties
178 .getRoot(), header_block_reader
179 .getSBATStart()), data_blocks, properties.getRoot()
180 .getChildren(), null);
181 }
182 /**
183 * @param stream the stream to be closed
184 * @param success <code>false</code> if an exception is currently being thrown in the calling method
185 */
186 private void closeInputStream(InputStream stream, boolean success) {
187
188 if(stream.markSupported() && !(stream instanceof ByteArrayInputStream)) {
189 String msg = "POIFS is closing the supplied input stream of type ("
190 + stream.getClass().getName() + ") which supports mark/reset. "
191 + "This will be a problem for the caller if the stream will still be used. "
192 + "If that is the case the caller should wrap the input stream to avoid this close logic. "
193 + "This warning is only temporary and will not be present in future versions of POI.";
194 _logger.log(POILogger.WARN, msg);
195 }
196 try {
197 stream.close();
198 } catch (IOException e) {
199 if(success) {
200 throw new RuntimeException(e);
201 }
202 // else not success? Try block did not complete normally
203 // just print stack trace and leave original ex to be thrown
204 e.printStackTrace();
205 }
206 }
207
208 /**
209 * Checks that the supplied InputStream (which MUST
210 * support mark and reset, or be a PushbackInputStream)
211 * has a POIFS (OLE2) header at the start of it.
212 * If your InputStream does not support mark / reset,
213 * then wrap it in a PushBackInputStream, then be
214 * sure to always use that, and not the original!
215 * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
216 */
217 public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
218 // We want to peek at the first 8 bytes
219 inp.mark(8);
220
221 byte[] header = new byte[8];
222 IOUtils.readFully(inp, header);
223 LongField signature = new LongField(HeaderBlockConstants._signature_offset, header);
224
225 // Wind back those 8 bytes
226 if(inp instanceof PushbackInputStream) {
227 PushbackInputStream pin = (PushbackInputStream)inp;
228 pin.unread(header);
229 } else {
230 inp.reset();
231 }
232
233 // Did it match the signature?
234 return (signature.get() == HeaderBlockConstants._signature);
235 }
236
237 /**
238 * Create a new document to be added to the root directory
239 *
240 * @param stream the InputStream from which the document's data
241 * will be obtained
242 * @param name the name of the new POIFSDocument
243 *
244 * @return the new DocumentEntry
245 *
246 * @exception IOException on error creating the new POIFSDocument
247 */
248
249 public DocumentEntry createDocument(final InputStream stream,
250 final String name)
251 throws IOException
252 {
253 return getRoot().createDocument(name, stream);
254 }
255
256 /**
257 * create a new DocumentEntry in the root entry; the data will be
258 * provided later
259 *
260 * @param name the name of the new DocumentEntry
261 * @param size the size of the new DocumentEntry
262 * @param writer the writer of the new DocumentEntry
263 *
264 * @return the new DocumentEntry
265 *
266 * @exception IOException
267 */
268
269 public DocumentEntry createDocument(final String name, final int size,
270 final POIFSWriterListener writer)
271 throws IOException
272 {
273 return getRoot().createDocument(name, size, writer);
274 }
275
276 /**
277 * create a new DirectoryEntry in the root directory
278 *
279 * @param name the name of the new DirectoryEntry
280 *
281 * @return the new DirectoryEntry
282 *
283 * @exception IOException on name duplication
284 */
285
286 public DirectoryEntry createDirectory(final String name)
287 throws IOException
288 {
289 return getRoot().createDirectory(name);
290 }
291
292 /**
293 * Write the filesystem out
294 *
295 * @param stream the OutputStream to which the filesystem will be
296 * written
297 *
298 * @exception IOException thrown on errors writing to the stream
299 */
300
301 public void writeFilesystem(final OutputStream stream)
302 throws IOException
303 {
304
305 // get the property table ready
306 _property_table.preWrite();
307
308 // create the small block store, and the SBAT
309 SmallBlockTableWriter sbtw =
310 new SmallBlockTableWriter(_documents, _property_table.getRoot());
311
312 // create the block allocation table
313 BlockAllocationTableWriter bat =
314 new BlockAllocationTableWriter();
315
316 // create a list of BATManaged objects: the documents plus the
317 // property table and the small block table
318 List bm_objects = new ArrayList();
319
320 bm_objects.addAll(_documents);
321 bm_objects.add(_property_table);
322 bm_objects.add(sbtw);
323 bm_objects.add(sbtw.getSBAT());
324
325 // walk the list, allocating space for each and assigning each
326 // a starting block number
327 Iterator iter = bm_objects.iterator();
328
329 while (iter.hasNext())
330 {
331 BATManaged bmo = ( BATManaged ) iter.next();
332 int block_count = bmo.countBlocks();
333
334 if (block_count != 0)
335 {
336 bmo.setStartBlock(bat.allocateSpace(block_count));
337 }
338 else
339 {
340
341 // Either the BATManaged object is empty or its data
342 // is composed of SmallBlocks; in either case,
343 // allocating space in the BAT is inappropriate
344 }
345 }
346
347 // allocate space for the block allocation table and take its
348 // starting block
349 int batStartBlock = bat.createBlocks();
350
351 // get the extended block allocation table blocks
352 HeaderBlockWriter header_block_writer = new HeaderBlockWriter();
353 BATBlock[] xbat_blocks =
354 header_block_writer.setBATBlocks(bat.countBlocks(),
355 batStartBlock);
356
357 // set the property table start block
358 header_block_writer.setPropertyStart(_property_table.getStartBlock());
359
360 // set the small block allocation table start block
361 header_block_writer.setSBATStart(sbtw.getSBAT().getStartBlock());
362
363 // set the small block allocation table block count
364 header_block_writer.setSBATBlockCount(sbtw.getSBATBlockCount());
365
366 // the header is now properly initialized. Make a list of
367 // writers (the header block, followed by the documents, the
368 // property table, the small block store, the small block
369 // allocation table, the block allocation table, and the
370 // extended block allocation table blocks)
371 List writers = new ArrayList();
372
373 writers.add(header_block_writer);
374 writers.addAll(_documents);
375 writers.add(_property_table);
376 writers.add(sbtw);
377 writers.add(sbtw.getSBAT());
378 writers.add(bat);
379 for (int j = 0; j < xbat_blocks.length; j++)
380 {
381 writers.add(xbat_blocks[ j ]);
382 }
383
384 // now, write everything out
385 iter = writers.iterator();
386 while (iter.hasNext())
387 {
388 BlockWritable writer = ( BlockWritable ) iter.next();
389
390 writer.writeBlocks(stream);
391 }
392 }
393
394 /**
395 * read in a file and write it back out again
396 *
397 * @param args names of the files; arg[ 0 ] is the input file,
398 * arg[ 1 ] is the output file
399 *
400 * @exception IOException
401 */
402
403 public static void main(String args[])
404 throws IOException
405 {
406 if (args.length != 2)
407 {
408 System.err.println(
409 "two arguments required: input filename and output filename");
410 System.exit(1);
411 }
412 FileInputStream istream = new FileInputStream(args[ 0 ]);
413 FileOutputStream ostream = new FileOutputStream(args[ 1 ]);
414
415 new POIFSFileSystem(istream).writeFilesystem(ostream);
416 istream.close();
417 ostream.close();
418 }
419
420 /**
421 * get the root entry
422 *
423 * @return the root entry
424 */
425
426 public DirectoryNode getRoot()
427 {
428 if (_root == null)
429 {
430 _root = new DirectoryNode(_property_table.getRoot(), this, null);
431 }
432 return _root;
433 }
434
435 /**
436 * open a document in the root entry's list of entries
437 *
438 * @param documentName the name of the document to be opened
439 *
440 * @return a newly opened DocumentInputStream
441 *
442 * @exception IOException if the document does not exist or the
443 * name is that of a DirectoryEntry
444 */
445
446 public DocumentInputStream createDocumentInputStream(
447 final String documentName)
448 throws IOException
449 {
450 return getRoot().createDocumentInputStream(documentName);
451 }
452
453 /**
454 * add a new POIFSDocument
455 *
456 * @param document the POIFSDocument being added
457 */
458
459 void addDocument(final POIFSDocument document)
460 {
461 _documents.add(document);
462 _property_table.addProperty(document.getDocumentProperty());
463 }
464
465 /**
466 * add a new DirectoryProperty
467 *
468 * @param directory the DirectoryProperty being added
469 */
470
471 void addDirectory(final DirectoryProperty directory)
472 {
473 _property_table.addProperty(directory);
474 }
475
476 /**
477 * remove an entry
478 *
479 * @param entry to be removed
480 */
481
482 void remove(EntryNode entry)
483 {
484 _property_table.removeProperty(entry.getProperty());
485 if (entry.isDocumentEntry())
486 {
487 _documents.remove((( DocumentNode ) entry).getDocument());
488 }
489 }
490
491 private void processProperties(final BlockList small_blocks,
492 final BlockList big_blocks,
493 final Iterator properties,
494 final DirectoryNode dir)
495 throws IOException
496 {
497 while (properties.hasNext())
498 {
499 Property property = ( Property ) properties.next();
500 String name = property.getName();
501 DirectoryNode parent = (dir == null)
502 ? (( DirectoryNode ) getRoot())
503 : dir;
504
505 if (property.isDirectory())
506 {
507 DirectoryNode new_dir =
508 ( DirectoryNode ) parent.createDirectory(name);
509
510 new_dir.setStorageClsid( property.getStorageClsid() );
511
512 processProperties(
513 small_blocks, big_blocks,
514 (( DirectoryProperty ) property).getChildren(), new_dir);
515 }
516 else
517 {
518 int startBlock = property.getStartBlock();
519 int size = property.getSize();
520 POIFSDocument document = null;
521
522 if (property.shouldUseSmallBlocks())
523 {
524 document =
525 new POIFSDocument(name, small_blocks
526 .fetchBlocks(startBlock), size);
527 }
528 else
529 {
530 document =
531 new POIFSDocument(name,
532 big_blocks.fetchBlocks(startBlock),
533 size);
534 }
535 parent.createDocument(document);
536 }
537 }
538 }
539
540 /* ********** START begin implementation of POIFSViewable ********** */
541
542 /**
543 * Get an array of objects, some of which may implement
544 * POIFSViewable
545 *
546 * @return an array of Object; may not be null, but may be empty
547 */
548
549 public Object [] getViewableArray()
550 {
551 if (preferArray())
552 {
553 return (( POIFSViewable ) getRoot()).getViewableArray();
554 }
555 else
556 {
557 return new Object[ 0 ];
558 }
559 }
560
561 /**
562 * Get an Iterator of objects, some of which may implement
563 * POIFSViewable
564 *
565 * @return an Iterator; may not be null, but may have an empty
566 * back end store
567 */
568
569 public Iterator getViewableIterator()
570 {
571 if (!preferArray())
572 {
573 return (( POIFSViewable ) getRoot()).getViewableIterator();
574 }
575 else
576 {
577 return Collections.EMPTY_LIST.iterator();
578 }
579 }
580
581 /**
582 * Give viewers a hint as to whether to call getViewableArray or
583 * getViewableIterator
584 *
585 * @return true if a viewer should call getViewableArray, false if
586 * a viewer should call getViewableIterator
587 */
588
589 public boolean preferArray()
590 {
591 return (( POIFSViewable ) getRoot()).preferArray();
592 }
593
594 /**
595 * Provides a short description of the object, to be used when a
596 * POIFSViewable object has not provided its contents.
597 *
598 * @return short description
599 */
600
601 public String getShortDescription()
602 {
603 return "POIFS FileSystem";
604 }
605
606 /**
607 * @return The Big Block size, normally 512 bytes, sometimes 4096 bytes
608 */
609 public int getBigBlockSize() {
610 return bigBlockSize;
611 }
612
613 /* ********** END begin implementation of POIFSViewable ********** */
614 } // end public class POIFSFileSystem
615