Save This Page
Home » poi-src-3.2-FINAL-20081019 » org.apache » poi » hslf » [javadoc | source]
    1   
    2   /* ====================================================================
    3      Licensed to the Apache Software Foundation (ASF) under one or more
    4      contributor license agreements.  See the NOTICE file distributed with
    5      this work for additional information regarding copyright ownership.
    6      The ASF licenses this file to You under the Apache License, Version 2.0
    7      (the "License"); you may not use this file except in compliance with
    8      the License.  You may obtain a copy of the License at
    9   
   10          http://www.apache.org/licenses/LICENSE-2.0
   11   
   12      Unless required by applicable law or agreed to in writing, software
   13      distributed under the License is distributed on an "AS IS" BASIS,
   14      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   15      See the License for the specific language governing permissions and
   16      limitations under the License.
   17   ==================================================================== */
   18           
   19   
   20   
   21   package org.apache.poi.hslf;
   22   
   23   import java.io.ByteArrayInputStream;
   24   import java.io.ByteArrayOutputStream;
   25   import java.io.FileInputStream;
   26   import java.io.FileNotFoundException;
   27   import java.io.IOException;
   28   import java.io.InputStream;
   29   import java.io.OutputStream;
   30   import java.util;
   31   
   32   import org.apache.poi.POIDocument;
   33   import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
   34   import org.apache.poi.hslf.exceptions.EncryptedPowerPointFileException;
   35   import org.apache.poi.hslf.exceptions.HSLFException;
   36   import org.apache.poi.hslf.record;
   37   import org.apache.poi.hslf.usermodel.ObjectData;
   38   import org.apache.poi.hslf.usermodel.PictureData;
   39   import org.apache.poi.hslf.model.Shape;
   40   import org.apache.poi.poifs.filesystem.DirectoryNode;
   41   import org.apache.poi.poifs.filesystem.DocumentEntry;
   42   import org.apache.poi.poifs.filesystem.DocumentInputStream;
   43   import org.apache.poi.poifs.filesystem.POIFSFileSystem;
   44   import org.apache.poi.util.LittleEndian;
   45   import org.apache.poi.util.POILogFactory;
   46   import org.apache.poi.util.POILogger;
   47   
   48   /**
   49    * This class contains the main functionality for the Powerpoint file 
   50    * "reader". It is only a very basic class for now
   51    *
   52    * @author Nick Burch
   53    */
   54   
   55   public class HSLFSlideShow extends POIDocument
   56   {
   57       // For logging
   58       private POILogger logger = POILogFactory.getLogger(this.getClass());
   59   
   60   	private InputStream istream;
   61   
   62   	// Holds metadata on where things are in our document
   63   	private CurrentUserAtom currentUser;
   64   
   65   	// Low level contents of the file
   66   	private byte[] _docstream;
   67   
   68   	// Low level contents
   69   	private Record[] _records;
   70   
   71   	// Raw Pictures contained in the pictures stream
   72   	private PictureData[] _pictures;
   73   
   74       // Embedded objects stored in storage records in the document stream, lazily populated.
   75       private ObjectData[] _objects;
   76   
   77       /**
   78   	 * Returns the underlying POIFSFileSystem for the document
   79   	 *  that is open.
   80   	 */
   81   	protected POIFSFileSystem getPOIFSFileSystem() {
   82   		return filesystem;
   83   	}
   84   
   85   	/**
   86   	 * Constructs a Powerpoint document from fileName. Parses the document 
   87   	 * and places all the important stuff into data structures.
   88   	 *
   89   	 * @param fileName The name of the file to read.
   90   	 * @throws IOException if there is a problem while parsing the document.
   91   	 */
   92   	public HSLFSlideShow(String fileName) throws IOException
   93   	{
   94   		this(new FileInputStream(fileName));
   95   	}
   96     
   97   	/**
   98   	 * Constructs a Powerpoint document from an input stream. Parses the 
   99   	 * document and places all the important stuff into data structures.
  100   	 *
  101   	 * @param inputStream the source of the data
  102   	 * @throws IOException if there is a problem while parsing the document.
  103   	 */
  104   	public HSLFSlideShow(InputStream inputStream) throws IOException
  105   	{
  106   		//do Ole stuff
  107   		this(new POIFSFileSystem(inputStream));
  108   		istream = inputStream;
  109   	}
  110   
  111   	/**
  112   	 * Constructs a Powerpoint document from a POIFS Filesystem. Parses the 
  113   	 * document and places all the important stuff into data structures.
  114   	 *
  115   	 * @param filesystem the POIFS FileSystem to read from
  116   	 * @throws IOException if there is a problem while parsing the document.
  117   	 */
  118   	public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException
  119   	{
  120   		this(filesystem.getRoot(), filesystem);
  121   	}
  122   	
  123   	/**
  124   	 * Constructs a Powerpoint document from a specific point in a 
  125   	 *  POIFS Filesystem. Parses the document and places all the
  126   	 *  important stuff into data structures.
  127   	 *
  128   	 * @param dir the POIFS directory to read from
  129   	 * @param filesystem the POIFS FileSystem to read from
  130   	 * @throws IOException if there is a problem while parsing the document.
  131   	 */
  132   	public HSLFSlideShow(DirectoryNode dir, POIFSFileSystem filesystem) throws IOException
  133   	{
  134   		super(dir, filesystem);
  135   		
  136   		// First up, grab the "Current User" stream
  137   		// We need this before we can detect Encrypted Documents
  138   		readCurrentUserStream();
  139   		
  140   		// Next up, grab the data that makes up the 
  141   		//  PowerPoint stream
  142   		readPowerPointStream();
  143   		
  144   		// Check to see if we have an encrypted document,
  145   		//  bailing out if we do
  146   		boolean encrypted = EncryptedSlideShow.checkIfEncrypted(this);
  147   		if(encrypted) {
  148   			throw new EncryptedPowerPointFileException("Encrypted PowerPoint files are not supported");
  149   		}
  150   
  151   		// Now, build records based on the PowerPoint stream
  152   		buildRecords();
  153   
  154   		// Look for Property Streams:
  155   		readProperties();
  156   		
  157   		// Look for any other streams
  158   		readOtherStreams();
  159   
  160   		// Look for Picture Streams:
  161   		readPictures();
  162   	}
  163   
  164   	/**
  165   	 * Constructs a new, empty, Powerpoint document.
  166   	 */
  167   	public HSLFSlideShow() throws IOException 
  168   	{
  169   		this(HSLFSlideShow.class.getResourceAsStream("/org/apache/poi/hslf/data/empty.ppt"));
  170   	}
  171   
  172   	/**
  173   	 * Shuts things down. Closes underlying streams etc
  174   	 *
  175   	 * @throws IOException
  176   	 */
  177   	public void close() throws IOException
  178   	{
  179   		if(istream != null) {
  180   			istream.close();
  181   		}
  182   		filesystem = null;
  183   	}
  184   
  185   
  186   	/**
  187   	 * Extracts the main PowerPoint document stream from the 
  188   	 *  POI file, ready to be passed 
  189   	 *
  190   	 * @throws IOException
  191   	 */
  192   	private void readPowerPointStream() throws IOException
  193   	{
  194   		// Get the main document stream
  195   		DocumentEntry docProps =
  196   			(DocumentEntry)directory.getEntry("PowerPoint Document");
  197   
  198   		// Grab the document stream
  199   		_docstream = new byte[docProps.getSize()];
  200   		directory.createDocumentInputStream("PowerPoint Document").read(_docstream);
  201   	}
  202   	
  203   	/**
  204   	 * Builds the list of records, based on the contents  
  205   	 *  of the PowerPoint stream
  206   	 */
  207   	private void buildRecords()
  208   	{
  209   		// The format of records in a powerpoint file are:
  210   		//   <little endian 2 byte "info">
  211   		//   <little endian 2 byte "type">
  212   		//   <little endian 4 byte "length">
  213   		// If it has a zero length, following it will be another record
  214   		//		<xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
  215   		// If it has a length, depending on its type it may have children or data
  216   		// If it has children, these will follow straight away
  217   		//		<xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
  218   		// If it has data, this will come straigh after, and run for the length
  219   		//      <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
  220   		// All lengths given exclude the 8 byte record header
  221   		// (Data records are known as Atoms)
  222   	
  223   		// Document should start with:
  224   		//   0F 00 E8 03 ## ## ## ##
  225   	    //     (type 1000 = document, info 00 0f is normal, rest is document length)
  226   		//   01 00 E9 03 28 00 00 00
  227   		//     (type 1001 = document atom, info 00 01 normal, 28 bytes long)
  228   		//   80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx
  229   		//   05 00 00 00 0A 00 00 00 xx xx xx
  230   		//     (the contents of the document atom, not sure what it means yet)
  231   		//   (records then follow)
  232   	
  233   		// When parsing a document, look to see if you know about that type
  234   		//  of the current record. If you know it's a type that has children, 
  235   		//  process the record's data area looking for more records
  236   		// If you know about the type and it doesn't have children, either do
  237   		//  something with the data (eg TextRun) or skip over it
  238   		// If you don't know about the type, play safe and skip over it (using
  239   		//  its length to know where the next record will start)
  240   		//
  241   
  242           _records = read(_docstream, (int)currentUser.getCurrentEditOffset());
  243   	}
  244   
  245       private Record[] read(byte[] docstream, int usrOffset){
  246           ArrayList lst = new ArrayList();
  247           HashMap offset2id = new HashMap(); 
  248           while (usrOffset != 0){
  249               UserEditAtom usr = (UserEditAtom) Record.buildRecordAtOffset(docstream, usrOffset);
  250               lst.add(new Integer(usrOffset));
  251               int psrOffset = usr.getPersistPointersOffset();
  252   
  253               PersistPtrHolder ptr = (PersistPtrHolder)Record.buildRecordAtOffset(docstream, psrOffset);
  254               lst.add(new Integer(psrOffset));
  255               Hashtable entries = ptr.getSlideLocationsLookup();
  256               for (Iterator it = entries.keySet().iterator(); it.hasNext(); ) {
  257                   Integer id = (Integer)it.next();
  258                   Integer offset = (Integer)entries.get(id);
  259   
  260                   lst.add(offset);
  261                   offset2id.put(offset, id);
  262               }
  263   
  264               usrOffset = usr.getLastUserEditAtomOffset();
  265           }
  266           //sort found records by offset.
  267           //(it is not necessary but SlideShow.findMostRecentCoreRecords() expects them sorted)
  268           Object a[] = lst.toArray();
  269           Arrays.sort(a);
  270           Record[] rec = new Record[lst.size()];
  271           for (int i = 0; i < a.length; i++) {
  272               Integer offset = (Integer)a[i];
  273               rec[i] = (Record)Record.buildRecordAtOffset(docstream, offset.intValue());
  274               if(rec[i] instanceof PersistRecord) {
  275                   PersistRecord psr = (PersistRecord)rec[i];
  276                   Integer id = (Integer)offset2id.get(offset);
  277                   psr.setPersistId(id.intValue());
  278               }
  279           }
  280   
  281           return rec;
  282       }
  283   
  284   	/**
  285   	 * Find the "Current User" stream, and load it 
  286   	 */
  287   	private void readCurrentUserStream() {
  288   		try {
  289   			currentUser = new CurrentUserAtom(directory);
  290   		} catch(IOException ie) {
  291   			logger.log(POILogger.ERROR, "Error finding Current User Atom:\n" + ie);
  292   			currentUser = new CurrentUserAtom();
  293   		}
  294   	}
  295   	
  296   	/**
  297   	 * Find any other streams from the filesystem, and load them 
  298   	 */
  299   	private void readOtherStreams() {
  300   		// Currently, there aren't any
  301   	}
  302   
  303   	/**
  304   	 * Find and read in pictures contained in this presentation
  305   	 */
  306   	private void readPictures() throws IOException {
  307   		byte[] pictstream;
  308   
  309   		try {
  310   			DocumentEntry entry = (DocumentEntry)directory.getEntry("Pictures");
  311   			pictstream = new byte[entry.getSize()];
  312   			DocumentInputStream is = directory.createDocumentInputStream("Pictures");
  313   			is.read(pictstream);
  314   		} catch (FileNotFoundException e){
  315   			// Silently catch exceptions if the presentation doesn't 
  316   			//  contain pictures - will use a null set instead
  317   			return;
  318   		}
  319   
  320           List p = new ArrayList();
  321           int pos = 0;
  322   
  323   		// An empty picture record (length 0) will take up 8 bytes
  324           while (pos <= (pictstream.length-8)) {
  325               int offset = pos;
  326   
  327               // Image signature
  328               int signature = LittleEndian.getUShort(pictstream, pos);
  329               pos += LittleEndian.SHORT_SIZE;
  330               // Image type + 0xF018
  331               int type = LittleEndian.getUShort(pictstream, pos);
  332               pos += LittleEndian.SHORT_SIZE;
  333               // Image size (excluding the 8 byte header)
  334               int imgsize = LittleEndian.getInt(pictstream, pos);
  335               pos += LittleEndian.INT_SIZE;
  336   
  337   			// The image size must be 0 or greater
  338   			// (0 is allowed, but odd, since we do wind on by the header each
  339   			//  time, so we won't get stuck)
  340   			if(imgsize < 0) {
  341   				throw new CorruptPowerPointFileException("The file contains a picture, at position " + p.size() + ", which has a negatively sized data length, so we can't trust any of the picture data");
  342   			}
  343   
  344   			// If they type (including the bonus 0xF018) is 0, skip it
  345   			if(type == 0) {
  346   				logger.log(POILogger.ERROR, "Problem reading picture: Invalid image type 0, on picture with length " + imgsize + ".\nYou document will probably become corrupted if you save it!");
  347   				logger.log(POILogger.ERROR, "" + pos);
  348   			} else {
  349   	            // Copy the data, ready to pass to PictureData
  350   	            byte[] imgdata = new byte[imgsize];
  351   	            if(imgsize > 0) {
  352   	            	System.arraycopy(pictstream, pos, imgdata, 0, imgdata.length);
  353   	            }
  354   	            
  355   				// Build the PictureData object from the data
  356   				try {
  357   					PictureData pict = PictureData.create(type - 0xF018);
  358   					pict.setRawData(imgdata);
  359   					pict.setOffset(offset);
  360   					p.add(pict);
  361   				} catch(IllegalArgumentException e) {
  362   					logger.log(POILogger.ERROR, "Problem reading picture: " + e + "\nYou document will probably become corrupted if you save it!");
  363   				}
  364   			}
  365               
  366               pos += imgsize;
  367           }
  368   
  369   		_pictures = (PictureData[])p.toArray(new PictureData[p.size()]);
  370   	}
  371   
  372   
  373       /**
  374        * Writes out the slideshow file the is represented by an instance
  375        *  of this class.
  376        * It will write out the common OLE2 streams. If you require all
  377        *  streams to be written out, pass in preserveNodes
  378        * @param out The OutputStream to write to.
  379        * @throws IOException If there is an unexpected IOException from
  380        *           the passed in OutputStream
  381        */
  382       public void write(OutputStream out) throws IOException {
  383           // Write out, but only the common streams
  384           write(out,false);
  385       }
  386       /**
  387        * Writes out the slideshow file the is represented by an instance
  388        *  of this class.
  389        * If you require all streams to be written out (eg Marcos, embeded
  390        *  documents), then set preserveNodes to true
  391        * @param out The OutputStream to write to.
  392        * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
  393        * @throws IOException If there is an unexpected IOException from
  394        *           the passed in OutputStream
  395        */
  396       public void write(OutputStream out, boolean preserveNodes) throws IOException {
  397           // Get a new Filesystem to write into
  398           POIFSFileSystem outFS = new POIFSFileSystem();
  399   
  400           // The list of entries we've written out
  401           List writtenEntries = new ArrayList(1);
  402           
  403           // Write out the Property Streams
  404           writeProperties(outFS, writtenEntries);
  405   
  406   
  407           // For position dependent records, hold where they were and now are
  408           // As we go along, update, and hand over, to any Position Dependent
  409           //  records we happen across
  410           Hashtable oldToNewPositions = new Hashtable();
  411   
  412           // First pass - figure out where all the position dependent
  413           //   records are going to end up, in the new scheme
  414           // (Annoyingly, some powerpoing files have PersistPtrHolders
  415           //  that reference slides after the PersistPtrHolder)
  416           ByteArrayOutputStream baos = new ByteArrayOutputStream();
  417           for(int i=0; i<_records.length; i++) {
  418               if(_records[i] instanceof PositionDependentRecord) {
  419                   PositionDependentRecord pdr = (PositionDependentRecord)_records[i];
  420                   int oldPos = pdr.getLastOnDiskOffset();
  421                   int newPos = baos.size();
  422                   pdr.setLastOnDiskOffset(newPos);
  423                   oldToNewPositions.put(new Integer(oldPos),new Integer(newPos));
  424                   //System.out.println(oldPos + " -> " + newPos);
  425               }
  426   
  427               // Dummy write out, so the position winds on properly
  428               _records[i].writeOut(baos);
  429           }
  430   
  431           // No go back through, actually writing ourselves out
  432           baos.reset();
  433           for(int i=0; i<_records.length; i++) {
  434               // For now, we're only handling PositionDependentRecord's that
  435               //  happen at the top level.
  436               // In future, we'll need the handle them everywhere, but that's
  437               //  a bit trickier
  438               if(_records[i] instanceof PositionDependentRecord) {
  439                   // We've already figured out their new location, and
  440                   //  told them that
  441                   // Tell them of the positions of the other records though
  442                   PositionDependentRecord pdr = (PositionDependentRecord)_records[i];
  443                   pdr.updateOtherRecordReferences(oldToNewPositions);
  444               }
  445   
  446               // Whatever happens, write out that record tree
  447               _records[i].writeOut(baos);
  448           }
  449           // Update our cached copy of the bytes that make up the PPT stream
  450           _docstream = baos.toByteArray();
  451   
  452           // Write the PPT stream into the POIFS layer
  453           ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
  454           outFS.createDocument(bais,"PowerPoint Document");
  455           writtenEntries.add("PowerPoint Document");
  456   
  457   
  458           // Update and write out the Current User atom
  459           int oldLastUserEditAtomPos = (int)currentUser.getCurrentEditOffset();
  460           Integer newLastUserEditAtomPos = (Integer)oldToNewPositions.get(new Integer(oldLastUserEditAtomPos));
  461           if(newLastUserEditAtomPos == null) {
  462               throw new HSLFException("Couldn't find the new location of the UserEditAtom that used to be at " + oldLastUserEditAtomPos);
  463           }
  464           currentUser.setCurrentEditOffset(newLastUserEditAtomPos.intValue());
  465           currentUser.writeToFS(outFS);
  466           writtenEntries.add("Current User");
  467   
  468   	
  469           // Write any pictures, into another stream
  470           if (_pictures != null) {
  471               ByteArrayOutputStream pict = new ByteArrayOutputStream();
  472               for (int i = 0; i < _pictures.length; i++ ) {
  473                   _pictures[i].write(pict);
  474               }
  475               outFS.createDocument(
  476                   new ByteArrayInputStream(pict.toByteArray()), "Pictures"
  477               );
  478               writtenEntries.add("Pictures");
  479           }
  480           
  481           // If requested, write out any other streams we spot
  482           if(preserveNodes) {
  483           	copyNodes(filesystem, outFS, writtenEntries);
  484           }
  485   
  486           // Send the POIFSFileSystem object out to the underlying stream
  487           outFS.writeFilesystem(out);
  488       }
  489   
  490   
  491   	/* ******************* adding methods follow ********************* */
  492   
  493   	/**
  494   	 * Adds a new root level record, at the end, but before the last
  495   	 *  PersistPtrIncrementalBlock.
  496   	 */
  497   	public synchronized int appendRootLevelRecord(Record newRecord) {
  498   		int addedAt = -1;
  499   		Record[] r = new Record[_records.length+1];
  500   		boolean added = false;
  501   		for(int i=(_records.length-1); i>=0; i--) {
  502   			if(added) {
  503   				// Just copy over
  504   				r[i] = _records[i];
  505   			} else {
  506   				r[(i+1)] = _records[i];
  507   				if(_records[i] instanceof PersistPtrHolder) {
  508   					r[i] = newRecord;
  509   					added = true;
  510   					addedAt = i;
  511   				}
  512   			}
  513   		}
  514   		_records = r;
  515   		return addedAt;
  516   	}
  517   	
  518   	/**
  519   	 *  Add a new picture to this presentation.
  520   	 */
  521   	public void addPicture(PictureData img) {
  522   		// Copy over the existing pictures, into an array one bigger
  523   		PictureData[] lst;
  524   		if(_pictures == null) {
  525   			lst = new PictureData[1];
  526   		} else {
  527   			lst = new PictureData[(_pictures.length+1)];
  528   			System.arraycopy(_pictures,0,lst,0,_pictures.length);
  529   		}
  530   		// Add in the new image
  531   		lst[lst.length - 1] = img;
  532   		_pictures = lst;
  533   	}
  534   
  535   	/* ******************* fetching methods follow ********************* */
  536   
  537   
  538   	/**
  539   	 * Returns an array of all the records found in the slideshow
  540   	 */
  541   	public Record[] getRecords() { return _records; }
  542   
  543   	/**
  544   	 * Returns an array of the bytes of the file. Only correct after a
  545   	 *  call to open or write - at all other times might be wrong!
  546   	 */
  547   	public byte[] getUnderlyingBytes() { return _docstream; }
  548   
  549   	/**
  550   	 * Fetch the Current User Atom of the document
  551   	 */
  552   	public CurrentUserAtom getCurrentUserAtom() { return currentUser; }
  553   
  554   	/**
  555   	 *  Return array of pictures contained in this presentation
  556   	 *
  557   	 *  @return array with the read pictures or <code>null</code> if the
  558   	 *  presentation doesn't contain pictures.
  559   	 */
  560   	public PictureData[] getPictures() {
  561   		return _pictures;
  562   	}
  563   
  564       /**
  565        * Gets embedded object data from the slide show.
  566        *
  567        * @return the embedded objects.
  568        */
  569       public ObjectData[] getEmbeddedObjects() {
  570           if (_objects == null) {
  571               List objects = new ArrayList();
  572               for (int i = 0; i < _records.length; i++) {
  573                   if (_records[i] instanceof ExOleObjStg) {
  574                       objects.add(new ObjectData((ExOleObjStg) _records[i]));
  575                   }
  576               }
  577               _objects = (ObjectData[]) objects.toArray(new ObjectData[objects.size()]);
  578           }
  579           return _objects;
  580       }
  581   }

Save This Page
Home » poi-src-3.2-FINAL-20081019 » org.apache » poi » hslf » [javadoc | source]