Save This Page
Home » poi-src-3.2-FINAL-20081019 » org.apache.poi.poifs » filesystem » [javadoc | source]
    1   
    2   /* ====================================================================
    3      Licensed to the Apache Software Foundation (ASF) under one or more
    4      contributor license agreements.  See the NOTICE file distributed with
    5      this work for additional information regarding copyright ownership.
    6      The ASF licenses this file to You under the Apache License, Version 2.0
    7      (the "License"); you may not use this file except in compliance with
    8      the License.  You may obtain a copy of the License at
    9   
   10          http://www.apache.org/licenses/LICENSE-2.0
   11   
   12      Unless required by applicable law or agreed to in writing, software
   13      distributed under the License is distributed on an "AS IS" BASIS,
   14      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   15      See the License for the specific language governing permissions and
   16      limitations under the License.
   17   ==================================================================== */
   18           
   19   
   20   package org.apache.poi.poifs.filesystem;
   21   
   22   import java.io.ByteArrayInputStream;
   23   import java.io.FileInputStream;
   24   import java.io.FileOutputStream;
   25   import java.io.IOException;
   26   import java.io.InputStream;
   27   import java.io.OutputStream;
   28   import java.io.PushbackInputStream;
   29   import java.util.ArrayList;
   30   import java.util.Collections;
   31   import java.util.Iterator;
   32   import java.util.List;
   33   
   34   import org.apache.poi.poifs.common.POIFSConstants;
   35   import org.apache.poi.poifs.dev.POIFSViewable;
   36   import org.apache.poi.poifs.property.DirectoryProperty;
   37   import org.apache.poi.poifs.property.Property;
   38   import org.apache.poi.poifs.property.PropertyTable;
   39   import org.apache.poi.poifs.storage.BATBlock;
   40   import org.apache.poi.poifs.storage.BlockAllocationTableReader;
   41   import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
   42   import org.apache.poi.poifs.storage.BlockList;
   43   import org.apache.poi.poifs.storage.BlockWritable;
   44   import org.apache.poi.poifs.storage.HeaderBlockConstants;
   45   import org.apache.poi.poifs.storage.HeaderBlockReader;
   46   import org.apache.poi.poifs.storage.HeaderBlockWriter;
   47   import org.apache.poi.poifs.storage.RawDataBlockList;
   48   import org.apache.poi.poifs.storage.SmallBlockTableReader;
   49   import org.apache.poi.poifs.storage.SmallBlockTableWriter;
   50   import org.apache.poi.util.IOUtils;
   51   import org.apache.poi.util.LongField;
   52   import org.apache.poi.util.POILogFactory;
   53   import org.apache.poi.util.POILogger;
   54   
   55   /**
   56    * This is the main class of the POIFS system; it manages the entire
   57    * life cycle of the filesystem.
   58    *
   59    * @author Marc Johnson (mjohnson at apache dot org)
   60    */
   61   
   62   public class POIFSFileSystem
   63       implements POIFSViewable
   64   {
   65   	private static final POILogger _logger =
   66   		POILogFactory.getLogger(POIFSFileSystem.class);
   67       
   68       private static final class CloseIgnoringInputStream extends InputStream {
   69   
   70           private final InputStream _is;
   71           public CloseIgnoringInputStream(InputStream is) {
   72               _is = is;
   73           }
   74           public int read() throws IOException {
   75               return _is.read();
   76           }
   77           public int read(byte[] b, int off, int len) throws IOException {
   78               return _is.read(b, off, len);
   79           }
   80           public void close() {
   81               // do nothing
   82           }
   83       }
   84       
   85       /**
   86        * Convenience method for clients that want to avoid the auto-close behaviour of the constructor.
   87        */
   88       public static InputStream createNonClosingInputStream(InputStream is) {
   89           return new CloseIgnoringInputStream(is);
   90       }
   91       
   92       private PropertyTable _property_table;
   93       private List          _documents;
   94       private DirectoryNode _root;
   95       
   96       /**
   97        * What big block size the file uses. Most files
   98        *  use 512 bytes, but a few use 4096
   99        */
  100       private int bigBlockSize = POIFSConstants.BIG_BLOCK_SIZE;
  101   
  102       /**
  103        * Constructor, intended for writing
  104        */
  105       public POIFSFileSystem()
  106       {
  107           _property_table = new PropertyTable();
  108           _documents      = new ArrayList();
  109           _root           = null;
  110       }
  111   
  112       /**
  113        * Create a POIFSFileSystem from an <tt>InputStream</tt>.  Normally the stream is read until
  114        * EOF.  The stream is always closed.<p/>
  115        * 
  116        * Some streams are usable after reaching EOF (typically those that return <code>true</code> 
  117        * for <tt>markSupported()</tt>).  In the unlikely case that the caller has such a stream 
  118        * <i>and</i> needs to use it after this constructor completes, a work around is to wrap the
  119        * stream in order to trap the <tt>close()</tt> call.  A convenience method (
  120        * <tt>createNonClosingInputStream()</tt>) has been provided for this purpose:
  121        * <pre>
  122        * InputStream wrappedStream = POIFSFileSystem.createNonClosingInputStream(is);
  123        * HSSFWorkbook wb = new HSSFWorkbook(wrappedStream);
  124        * is.reset(); 
  125        * doSomethingElse(is); 
  126        * </pre>
  127        * Note also the special case of <tt>ByteArrayInputStream</tt> for which the <tt>close()</tt>
  128        * method does nothing. 
  129        * <pre>
  130        * ByteArrayInputStream bais = ...
  131        * HSSFWorkbook wb = new HSSFWorkbook(bais); // calls bais.close() !
  132        * bais.reset(); // no problem
  133        * doSomethingElse(bais);
  134        * </pre>
  135        *
  136        * @param stream the InputStream from which to read the data
  137        *
  138        * @exception IOException on errors reading, or on invalid data
  139        */
  140   
  141       public POIFSFileSystem(InputStream stream)
  142           throws IOException
  143       {
  144           this();
  145           boolean success = false;
  146   
  147           HeaderBlockReader header_block_reader;
  148           RawDataBlockList data_blocks;
  149           try {
  150               // read the header block from the stream
  151               header_block_reader = new HeaderBlockReader(stream);
  152               bigBlockSize = header_block_reader.getBigBlockSize();
  153               
  154               // read the rest of the stream into blocks
  155               data_blocks = new RawDataBlockList(stream, bigBlockSize);
  156               success = true;
  157           } finally {
  158               closeInputStream(stream, success);
  159           }
  160           
  161   
  162           // set up the block allocation table (necessary for the
  163           // data_blocks to be manageable
  164           new BlockAllocationTableReader(header_block_reader.getBATCount(),
  165                                          header_block_reader.getBATArray(),
  166                                          header_block_reader.getXBATCount(),
  167                                          header_block_reader.getXBATIndex(),
  168                                          data_blocks);
  169   
  170           // get property table from the document
  171           PropertyTable properties =
  172               new PropertyTable(header_block_reader.getPropertyStart(),
  173                                 data_blocks);
  174   
  175           // init documents
  176           processProperties(SmallBlockTableReader
  177               .getSmallDocumentBlocks(data_blocks, properties
  178                   .getRoot(), header_block_reader
  179                       .getSBATStart()), data_blocks, properties.getRoot()
  180                           .getChildren(), null);
  181       }
  182       /**
  183        * @param stream the stream to be closed
  184        * @param success <code>false</code> if an exception is currently being thrown in the calling method
  185        */
  186       private void closeInputStream(InputStream stream, boolean success) {
  187           
  188           if(stream.markSupported() && !(stream instanceof ByteArrayInputStream)) {
  189               String msg = "POIFS is closing the supplied input stream of type (" 
  190                       + stream.getClass().getName() + ") which supports mark/reset.  "
  191                       + "This will be a problem for the caller if the stream will still be used.  "
  192                       + "If that is the case the caller should wrap the input stream to avoid this close logic.  "
  193                       + "This warning is only temporary and will not be present in future versions of POI.";
  194               _logger.log(POILogger.WARN, msg);
  195           }
  196           try {
  197               stream.close();
  198           } catch (IOException e) {
  199               if(success) {
  200                   throw new RuntimeException(e);
  201               }
  202               // else not success? Try block did not complete normally 
  203               // just print stack trace and leave original ex to be thrown
  204               e.printStackTrace();
  205           }
  206       }
  207   
  208       /**
  209        * Checks that the supplied InputStream (which MUST
  210        *  support mark and reset, or be a PushbackInputStream) 
  211        *  has a POIFS (OLE2) header at the start of it.
  212        * If your InputStream does not support mark / reset,
  213        *  then wrap it in a PushBackInputStream, then be
  214        *  sure to always use that, and not the original!
  215        * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream 
  216        */
  217       public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
  218           // We want to peek at the first 8 bytes 
  219           inp.mark(8);
  220   
  221           byte[] header = new byte[8];
  222           IOUtils.readFully(inp, header);
  223           LongField signature = new LongField(HeaderBlockConstants._signature_offset, header);
  224   
  225           // Wind back those 8 bytes
  226           if(inp instanceof PushbackInputStream) {
  227               PushbackInputStream pin = (PushbackInputStream)inp;
  228               pin.unread(header);
  229           } else {
  230               inp.reset();
  231           }
  232           
  233           // Did it match the signature?
  234           return (signature.get() == HeaderBlockConstants._signature);
  235       }
  236   
  237       /**
  238        * Create a new document to be added to the root directory
  239        *
  240        * @param stream the InputStream from which the document's data
  241        *               will be obtained
  242        * @param name the name of the new POIFSDocument
  243        *
  244        * @return the new DocumentEntry
  245        *
  246        * @exception IOException on error creating the new POIFSDocument
  247        */
  248   
  249       public DocumentEntry createDocument(final InputStream stream,
  250                                           final String name)
  251           throws IOException
  252       {
  253           return getRoot().createDocument(name, stream);
  254       }
  255   
  256       /**
  257        * create a new DocumentEntry in the root entry; the data will be
  258        * provided later
  259        *
  260        * @param name the name of the new DocumentEntry
  261        * @param size the size of the new DocumentEntry
  262        * @param writer the writer of the new DocumentEntry
  263        *
  264        * @return the new DocumentEntry
  265        *
  266        * @exception IOException
  267        */
  268   
  269       public DocumentEntry createDocument(final String name, final int size,
  270                                           final POIFSWriterListener writer)
  271           throws IOException
  272       {
  273           return getRoot().createDocument(name, size, writer);
  274       }
  275   
  276       /**
  277        * create a new DirectoryEntry in the root directory
  278        *
  279        * @param name the name of the new DirectoryEntry
  280        *
  281        * @return the new DirectoryEntry
  282        *
  283        * @exception IOException on name duplication
  284        */
  285   
  286       public DirectoryEntry createDirectory(final String name)
  287           throws IOException
  288       {
  289           return getRoot().createDirectory(name);
  290       }
  291       
  292       /**
  293        * Write the filesystem out
  294        *
  295        * @param stream the OutputStream to which the filesystem will be
  296        *               written
  297        *
  298        * @exception IOException thrown on errors writing to the stream
  299        */
  300   
  301       public void writeFilesystem(final OutputStream stream)
  302           throws IOException
  303       {
  304   
  305           // get the property table ready
  306           _property_table.preWrite();
  307   
  308           // create the small block store, and the SBAT
  309           SmallBlockTableWriter      sbtw       =
  310               new SmallBlockTableWriter(_documents, _property_table.getRoot());
  311   
  312           // create the block allocation table
  313           BlockAllocationTableWriter bat        =
  314               new BlockAllocationTableWriter();
  315   
  316           // create a list of BATManaged objects: the documents plus the
  317           // property table and the small block table
  318           List bm_objects = new ArrayList();
  319   
  320           bm_objects.addAll(_documents);
  321           bm_objects.add(_property_table);
  322           bm_objects.add(sbtw);
  323           bm_objects.add(sbtw.getSBAT());
  324   
  325           // walk the list, allocating space for each and assigning each
  326           // a starting block number
  327           Iterator iter = bm_objects.iterator();
  328   
  329           while (iter.hasNext())
  330           {
  331               BATManaged bmo         = ( BATManaged ) iter.next();
  332               int        block_count = bmo.countBlocks();
  333   
  334               if (block_count != 0)
  335               {
  336                   bmo.setStartBlock(bat.allocateSpace(block_count));
  337               }
  338               else
  339               {
  340   
  341                   // Either the BATManaged object is empty or its data
  342                   // is composed of SmallBlocks; in either case,
  343                   // allocating space in the BAT is inappropriate
  344               }
  345           }
  346   
  347           // allocate space for the block allocation table and take its
  348           // starting block
  349           int               batStartBlock       = bat.createBlocks();
  350   
  351           // get the extended block allocation table blocks
  352           HeaderBlockWriter header_block_writer = new HeaderBlockWriter();
  353           BATBlock[]        xbat_blocks         =
  354               header_block_writer.setBATBlocks(bat.countBlocks(),
  355                                                batStartBlock);
  356   
  357           // set the property table start block
  358           header_block_writer.setPropertyStart(_property_table.getStartBlock());
  359   
  360           // set the small block allocation table start block
  361           header_block_writer.setSBATStart(sbtw.getSBAT().getStartBlock());
  362   
  363           // set the small block allocation table block count
  364           header_block_writer.setSBATBlockCount(sbtw.getSBATBlockCount());
  365   
  366           // the header is now properly initialized. Make a list of
  367           // writers (the header block, followed by the documents, the
  368           // property table, the small block store, the small block
  369           // allocation table, the block allocation table, and the
  370           // extended block allocation table blocks)
  371           List writers = new ArrayList();
  372   
  373           writers.add(header_block_writer);
  374           writers.addAll(_documents);
  375           writers.add(_property_table);
  376           writers.add(sbtw);
  377           writers.add(sbtw.getSBAT());
  378           writers.add(bat);
  379           for (int j = 0; j < xbat_blocks.length; j++)
  380           {
  381               writers.add(xbat_blocks[ j ]);
  382           }
  383   
  384           // now, write everything out
  385           iter = writers.iterator();
  386           while (iter.hasNext())
  387           {
  388               BlockWritable writer = ( BlockWritable ) iter.next();
  389   
  390               writer.writeBlocks(stream);
  391           }
  392       }
  393   
  394       /**
  395        * read in a file and write it back out again
  396        *
  397        * @param args names of the files; arg[ 0 ] is the input file,
  398        *             arg[ 1 ] is the output file
  399        *
  400        * @exception IOException
  401        */
  402   
  403       public static void main(String args[])
  404           throws IOException
  405       {
  406           if (args.length != 2)
  407           {
  408               System.err.println(
  409                   "two arguments required: input filename and output filename");
  410               System.exit(1);
  411           }
  412           FileInputStream  istream = new FileInputStream(args[ 0 ]);
  413           FileOutputStream ostream = new FileOutputStream(args[ 1 ]);
  414   
  415           new POIFSFileSystem(istream).writeFilesystem(ostream);
  416           istream.close();
  417           ostream.close();
  418       }
  419   
  420       /**
  421        * get the root entry
  422        *
  423        * @return the root entry
  424        */
  425   
  426       public DirectoryNode getRoot()
  427       {
  428           if (_root == null)
  429           {
  430               _root = new DirectoryNode(_property_table.getRoot(), this, null);
  431           }
  432           return _root;
  433       }
  434   
  435       /**
  436        * open a document in the root entry's list of entries
  437        *
  438        * @param documentName the name of the document to be opened
  439        *
  440        * @return a newly opened DocumentInputStream
  441        *
  442        * @exception IOException if the document does not exist or the
  443        *            name is that of a DirectoryEntry
  444        */
  445   
  446       public DocumentInputStream createDocumentInputStream(
  447               final String documentName)
  448           throws IOException
  449       {
  450       	return getRoot().createDocumentInputStream(documentName);
  451       }
  452   
  453       /**
  454        * add a new POIFSDocument
  455        *
  456        * @param document the POIFSDocument being added
  457        */
  458   
  459       void addDocument(final POIFSDocument document)
  460       {
  461           _documents.add(document);
  462           _property_table.addProperty(document.getDocumentProperty());
  463       }
  464   
  465       /**
  466        * add a new DirectoryProperty
  467        *
  468        * @param directory the DirectoryProperty being added
  469        */
  470   
  471       void addDirectory(final DirectoryProperty directory)
  472       {
  473           _property_table.addProperty(directory);
  474       }
  475   
  476       /**
  477        * remove an entry
  478        *
  479        * @param entry to be removed
  480        */
  481   
  482       void remove(EntryNode entry)
  483       {
  484           _property_table.removeProperty(entry.getProperty());
  485           if (entry.isDocumentEntry())
  486           {
  487               _documents.remove((( DocumentNode ) entry).getDocument());
  488           }
  489       }
  490   
  491       private void processProperties(final BlockList small_blocks,
  492                                      final BlockList big_blocks,
  493                                      final Iterator properties,
  494                                      final DirectoryNode dir)
  495           throws IOException
  496       {
  497           while (properties.hasNext())
  498           {
  499               Property      property = ( Property ) properties.next();
  500               String        name     = property.getName();
  501               DirectoryNode parent   = (dir == null)
  502                                        ? (( DirectoryNode ) getRoot())
  503                                        : dir;
  504   
  505               if (property.isDirectory())
  506               {
  507                   DirectoryNode new_dir =
  508                       ( DirectoryNode ) parent.createDirectory(name);
  509   
  510                   new_dir.setStorageClsid( property.getStorageClsid() );
  511   
  512                   processProperties(
  513                       small_blocks, big_blocks,
  514                       (( DirectoryProperty ) property).getChildren(), new_dir);
  515               }
  516               else
  517               {
  518                   int           startBlock = property.getStartBlock();
  519                   int           size       = property.getSize();
  520                   POIFSDocument document   = null;
  521   
  522                   if (property.shouldUseSmallBlocks())
  523                   {
  524                       document =
  525                           new POIFSDocument(name, small_blocks
  526                               .fetchBlocks(startBlock), size);
  527                   }
  528                   else
  529                   {
  530                       document =
  531                           new POIFSDocument(name,
  532                                             big_blocks.fetchBlocks(startBlock),
  533                                             size);
  534                   }
  535                   parent.createDocument(document);
  536               }
  537           }
  538       }
  539   
  540       /* ********** START begin implementation of POIFSViewable ********** */
  541   
  542       /**
  543        * Get an array of objects, some of which may implement
  544        * POIFSViewable
  545        *
  546        * @return an array of Object; may not be null, but may be empty
  547        */
  548   
  549       public Object [] getViewableArray()
  550       {
  551           if (preferArray())
  552           {
  553               return (( POIFSViewable ) getRoot()).getViewableArray();
  554           }
  555           else
  556           {
  557               return new Object[ 0 ];
  558           }
  559       }
  560   
  561       /**
  562        * Get an Iterator of objects, some of which may implement
  563        * POIFSViewable
  564        *
  565        * @return an Iterator; may not be null, but may have an empty
  566        * back end store
  567        */
  568   
  569       public Iterator getViewableIterator()
  570       {
  571           if (!preferArray())
  572           {
  573               return (( POIFSViewable ) getRoot()).getViewableIterator();
  574           }
  575           else
  576           {
  577               return Collections.EMPTY_LIST.iterator();
  578           }
  579       }
  580   
  581       /**
  582        * Give viewers a hint as to whether to call getViewableArray or
  583        * getViewableIterator
  584        *
  585        * @return true if a viewer should call getViewableArray, false if
  586        *         a viewer should call getViewableIterator
  587        */
  588   
  589       public boolean preferArray()
  590       {
  591           return (( POIFSViewable ) getRoot()).preferArray();
  592       }
  593   
  594       /**
  595        * Provides a short description of the object, to be used when a
  596        * POIFSViewable object has not provided its contents.
  597        *
  598        * @return short description
  599        */
  600   
  601       public String getShortDescription()
  602       {
  603           return "POIFS FileSystem";
  604       }
  605   
  606       /**
  607        * @return The Big Block size, normally 512 bytes, sometimes 4096 bytes
  608        */
  609       public int getBigBlockSize() {
  610       	return bigBlockSize;
  611       }
  612       
  613       /* **********  END  begin implementation of POIFSViewable ********** */
  614   }   // end public class POIFSFileSystem
  615   

Save This Page
Home » poi-src-3.2-FINAL-20081019 » org.apache.poi.poifs » filesystem » [javadoc | source]