1
2 /* ====================================================================
3 Licensed to the Apache Software Foundation (ASF) under one or more
4 contributor license agreements. See the NOTICE file distributed with
5 this work for additional information regarding copyright ownership.
6 The ASF licenses this file to You under the Apache License, Version 2.0
7 (the "License"); you may not use this file except in compliance with
8 the License. You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 ==================================================================== */
18
19
20
21 package org.apache.poi.hslf;
22
23 import java.io.ByteArrayInputStream;
24 import java.io.ByteArrayOutputStream;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.OutputStream;
30 import java.util;
31
32 import org.apache.poi.POIDocument;
33 import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
34 import org.apache.poi.hslf.exceptions.EncryptedPowerPointFileException;
35 import org.apache.poi.hslf.exceptions.HSLFException;
36 import org.apache.poi.hslf.record;
37 import org.apache.poi.hslf.usermodel.ObjectData;
38 import org.apache.poi.hslf.usermodel.PictureData;
39 import org.apache.poi.hslf.model.Shape;
40 import org.apache.poi.poifs.filesystem.DirectoryNode;
41 import org.apache.poi.poifs.filesystem.DocumentEntry;
42 import org.apache.poi.poifs.filesystem.DocumentInputStream;
43 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
44 import org.apache.poi.util.LittleEndian;
45 import org.apache.poi.util.POILogFactory;
46 import org.apache.poi.util.POILogger;
47
48 /**
49 * This class contains the main functionality for the Powerpoint file
50 * "reader". It is only a very basic class for now
51 *
52 * @author Nick Burch
53 */
54
55 public class HSLFSlideShow extends POIDocument
56 {
57 // For logging
58 private POILogger logger = POILogFactory.getLogger(this.getClass());
59
60 private InputStream istream;
61
62 // Holds metadata on where things are in our document
63 private CurrentUserAtom currentUser;
64
65 // Low level contents of the file
66 private byte[] _docstream;
67
68 // Low level contents
69 private Record[] _records;
70
71 // Raw Pictures contained in the pictures stream
72 private PictureData[] _pictures;
73
74 // Embedded objects stored in storage records in the document stream, lazily populated.
75 private ObjectData[] _objects;
76
77 /**
78 * Returns the underlying POIFSFileSystem for the document
79 * that is open.
80 */
81 protected POIFSFileSystem getPOIFSFileSystem() {
82 return filesystem;
83 }
84
85 /**
86 * Constructs a Powerpoint document from fileName. Parses the document
87 * and places all the important stuff into data structures.
88 *
89 * @param fileName The name of the file to read.
90 * @throws IOException if there is a problem while parsing the document.
91 */
92 public HSLFSlideShow(String fileName) throws IOException
93 {
94 this(new FileInputStream(fileName));
95 }
96
97 /**
98 * Constructs a Powerpoint document from an input stream. Parses the
99 * document and places all the important stuff into data structures.
100 *
101 * @param inputStream the source of the data
102 * @throws IOException if there is a problem while parsing the document.
103 */
104 public HSLFSlideShow(InputStream inputStream) throws IOException
105 {
106 //do Ole stuff
107 this(new POIFSFileSystem(inputStream));
108 istream = inputStream;
109 }
110
111 /**
112 * Constructs a Powerpoint document from a POIFS Filesystem. Parses the
113 * document and places all the important stuff into data structures.
114 *
115 * @param filesystem the POIFS FileSystem to read from
116 * @throws IOException if there is a problem while parsing the document.
117 */
118 public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException
119 {
120 this(filesystem.getRoot(), filesystem);
121 }
122
123 /**
124 * Constructs a Powerpoint document from a specific point in a
125 * POIFS Filesystem. Parses the document and places all the
126 * important stuff into data structures.
127 *
128 * @param dir the POIFS directory to read from
129 * @param filesystem the POIFS FileSystem to read from
130 * @throws IOException if there is a problem while parsing the document.
131 */
132 public HSLFSlideShow(DirectoryNode dir, POIFSFileSystem filesystem) throws IOException
133 {
134 super(dir, filesystem);
135
136 // First up, grab the "Current User" stream
137 // We need this before we can detect Encrypted Documents
138 readCurrentUserStream();
139
140 // Next up, grab the data that makes up the
141 // PowerPoint stream
142 readPowerPointStream();
143
144 // Check to see if we have an encrypted document,
145 // bailing out if we do
146 boolean encrypted = EncryptedSlideShow.checkIfEncrypted(this);
147 if(encrypted) {
148 throw new EncryptedPowerPointFileException("Encrypted PowerPoint files are not supported");
149 }
150
151 // Now, build records based on the PowerPoint stream
152 buildRecords();
153
154 // Look for Property Streams:
155 readProperties();
156
157 // Look for any other streams
158 readOtherStreams();
159
160 // Look for Picture Streams:
161 readPictures();
162 }
163
164 /**
165 * Constructs a new, empty, Powerpoint document.
166 */
167 public HSLFSlideShow() throws IOException
168 {
169 this(HSLFSlideShow.class.getResourceAsStream("/org/apache/poi/hslf/data/empty.ppt"));
170 }
171
172 /**
173 * Shuts things down. Closes underlying streams etc
174 *
175 * @throws IOException
176 */
177 public void close() throws IOException
178 {
179 if(istream != null) {
180 istream.close();
181 }
182 filesystem = null;
183 }
184
185
186 /**
187 * Extracts the main PowerPoint document stream from the
188 * POI file, ready to be passed
189 *
190 * @throws IOException
191 */
192 private void readPowerPointStream() throws IOException
193 {
194 // Get the main document stream
195 DocumentEntry docProps =
196 (DocumentEntry)directory.getEntry("PowerPoint Document");
197
198 // Grab the document stream
199 _docstream = new byte[docProps.getSize()];
200 directory.createDocumentInputStream("PowerPoint Document").read(_docstream);
201 }
202
203 /**
204 * Builds the list of records, based on the contents
205 * of the PowerPoint stream
206 */
207 private void buildRecords()
208 {
209 // The format of records in a powerpoint file are:
210 // <little endian 2 byte "info">
211 // <little endian 2 byte "type">
212 // <little endian 4 byte "length">
213 // If it has a zero length, following it will be another record
214 // <xx xx yy yy 00 00 00 00> <xx xx yy yy zz zz zz zz>
215 // If it has a length, depending on its type it may have children or data
216 // If it has children, these will follow straight away
217 // <xx xx yy yy zz zz zz zz <xx xx yy yy zz zz zz zz>>
218 // If it has data, this will come straigh after, and run for the length
219 // <xx xx yy yy zz zz zz zz dd dd dd dd dd dd dd>
220 // All lengths given exclude the 8 byte record header
221 // (Data records are known as Atoms)
222
223 // Document should start with:
224 // 0F 00 E8 03 ## ## ## ##
225 // (type 1000 = document, info 00 0f is normal, rest is document length)
226 // 01 00 E9 03 28 00 00 00
227 // (type 1001 = document atom, info 00 01 normal, 28 bytes long)
228 // 80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx
229 // 05 00 00 00 0A 00 00 00 xx xx xx
230 // (the contents of the document atom, not sure what it means yet)
231 // (records then follow)
232
233 // When parsing a document, look to see if you know about that type
234 // of the current record. If you know it's a type that has children,
235 // process the record's data area looking for more records
236 // If you know about the type and it doesn't have children, either do
237 // something with the data (eg TextRun) or skip over it
238 // If you don't know about the type, play safe and skip over it (using
239 // its length to know where the next record will start)
240 //
241
242 _records = read(_docstream, (int)currentUser.getCurrentEditOffset());
243 }
244
245 private Record[] read(byte[] docstream, int usrOffset){
246 ArrayList lst = new ArrayList();
247 HashMap offset2id = new HashMap();
248 while (usrOffset != 0){
249 UserEditAtom usr = (UserEditAtom) Record.buildRecordAtOffset(docstream, usrOffset);
250 lst.add(new Integer(usrOffset));
251 int psrOffset = usr.getPersistPointersOffset();
252
253 PersistPtrHolder ptr = (PersistPtrHolder)Record.buildRecordAtOffset(docstream, psrOffset);
254 lst.add(new Integer(psrOffset));
255 Hashtable entries = ptr.getSlideLocationsLookup();
256 for (Iterator it = entries.keySet().iterator(); it.hasNext(); ) {
257 Integer id = (Integer)it.next();
258 Integer offset = (Integer)entries.get(id);
259
260 lst.add(offset);
261 offset2id.put(offset, id);
262 }
263
264 usrOffset = usr.getLastUserEditAtomOffset();
265 }
266 //sort found records by offset.
267 //(it is not necessary but SlideShow.findMostRecentCoreRecords() expects them sorted)
268 Object a[] = lst.toArray();
269 Arrays.sort(a);
270 Record[] rec = new Record[lst.size()];
271 for (int i = 0; i < a.length; i++) {
272 Integer offset = (Integer)a[i];
273 rec[i] = (Record)Record.buildRecordAtOffset(docstream, offset.intValue());
274 if(rec[i] instanceof PersistRecord) {
275 PersistRecord psr = (PersistRecord)rec[i];
276 Integer id = (Integer)offset2id.get(offset);
277 psr.setPersistId(id.intValue());
278 }
279 }
280
281 return rec;
282 }
283
284 /**
285 * Find the "Current User" stream, and load it
286 */
287 private void readCurrentUserStream() {
288 try {
289 currentUser = new CurrentUserAtom(directory);
290 } catch(IOException ie) {
291 logger.log(POILogger.ERROR, "Error finding Current User Atom:\n" + ie);
292 currentUser = new CurrentUserAtom();
293 }
294 }
295
296 /**
297 * Find any other streams from the filesystem, and load them
298 */
299 private void readOtherStreams() {
300 // Currently, there aren't any
301 }
302
303 /**
304 * Find and read in pictures contained in this presentation
305 */
306 private void readPictures() throws IOException {
307 byte[] pictstream;
308
309 try {
310 DocumentEntry entry = (DocumentEntry)directory.getEntry("Pictures");
311 pictstream = new byte[entry.getSize()];
312 DocumentInputStream is = directory.createDocumentInputStream("Pictures");
313 is.read(pictstream);
314 } catch (FileNotFoundException e){
315 // Silently catch exceptions if the presentation doesn't
316 // contain pictures - will use a null set instead
317 return;
318 }
319
320 List p = new ArrayList();
321 int pos = 0;
322
323 // An empty picture record (length 0) will take up 8 bytes
324 while (pos <= (pictstream.length-8)) {
325 int offset = pos;
326
327 // Image signature
328 int signature = LittleEndian.getUShort(pictstream, pos);
329 pos += LittleEndian.SHORT_SIZE;
330 // Image type + 0xF018
331 int type = LittleEndian.getUShort(pictstream, pos);
332 pos += LittleEndian.SHORT_SIZE;
333 // Image size (excluding the 8 byte header)
334 int imgsize = LittleEndian.getInt(pictstream, pos);
335 pos += LittleEndian.INT_SIZE;
336
337 // The image size must be 0 or greater
338 // (0 is allowed, but odd, since we do wind on by the header each
339 // time, so we won't get stuck)
340 if(imgsize < 0) {
341 throw new CorruptPowerPointFileException("The file contains a picture, at position " + p.size() + ", which has a negatively sized data length, so we can't trust any of the picture data");
342 }
343
344 // If they type (including the bonus 0xF018) is 0, skip it
345 if(type == 0) {
346 logger.log(POILogger.ERROR, "Problem reading picture: Invalid image type 0, on picture with length " + imgsize + ".\nYou document will probably become corrupted if you save it!");
347 logger.log(POILogger.ERROR, "" + pos);
348 } else {
349 // Copy the data, ready to pass to PictureData
350 byte[] imgdata = new byte[imgsize];
351 if(imgsize > 0) {
352 System.arraycopy(pictstream, pos, imgdata, 0, imgdata.length);
353 }
354
355 // Build the PictureData object from the data
356 try {
357 PictureData pict = PictureData.create(type - 0xF018);
358 pict.setRawData(imgdata);
359 pict.setOffset(offset);
360 p.add(pict);
361 } catch(IllegalArgumentException e) {
362 logger.log(POILogger.ERROR, "Problem reading picture: " + e + "\nYou document will probably become corrupted if you save it!");
363 }
364 }
365
366 pos += imgsize;
367 }
368
369 _pictures = (PictureData[])p.toArray(new PictureData[p.size()]);
370 }
371
372
373 /**
374 * Writes out the slideshow file the is represented by an instance
375 * of this class.
376 * It will write out the common OLE2 streams. If you require all
377 * streams to be written out, pass in preserveNodes
378 * @param out The OutputStream to write to.
379 * @throws IOException If there is an unexpected IOException from
380 * the passed in OutputStream
381 */
382 public void write(OutputStream out) throws IOException {
383 // Write out, but only the common streams
384 write(out,false);
385 }
386 /**
387 * Writes out the slideshow file the is represented by an instance
388 * of this class.
389 * If you require all streams to be written out (eg Marcos, embeded
390 * documents), then set preserveNodes to true
391 * @param out The OutputStream to write to.
392 * @param preserveNodes Should all OLE2 streams be written back out, or only the common ones?
393 * @throws IOException If there is an unexpected IOException from
394 * the passed in OutputStream
395 */
396 public void write(OutputStream out, boolean preserveNodes) throws IOException {
397 // Get a new Filesystem to write into
398 POIFSFileSystem outFS = new POIFSFileSystem();
399
400 // The list of entries we've written out
401 List writtenEntries = new ArrayList(1);
402
403 // Write out the Property Streams
404 writeProperties(outFS, writtenEntries);
405
406
407 // For position dependent records, hold where they were and now are
408 // As we go along, update, and hand over, to any Position Dependent
409 // records we happen across
410 Hashtable oldToNewPositions = new Hashtable();
411
412 // First pass - figure out where all the position dependent
413 // records are going to end up, in the new scheme
414 // (Annoyingly, some powerpoing files have PersistPtrHolders
415 // that reference slides after the PersistPtrHolder)
416 ByteArrayOutputStream baos = new ByteArrayOutputStream();
417 for(int i=0; i<_records.length; i++) {
418 if(_records[i] instanceof PositionDependentRecord) {
419 PositionDependentRecord pdr = (PositionDependentRecord)_records[i];
420 int oldPos = pdr.getLastOnDiskOffset();
421 int newPos = baos.size();
422 pdr.setLastOnDiskOffset(newPos);
423 oldToNewPositions.put(new Integer(oldPos),new Integer(newPos));
424 //System.out.println(oldPos + " -> " + newPos);
425 }
426
427 // Dummy write out, so the position winds on properly
428 _records[i].writeOut(baos);
429 }
430
431 // No go back through, actually writing ourselves out
432 baos.reset();
433 for(int i=0; i<_records.length; i++) {
434 // For now, we're only handling PositionDependentRecord's that
435 // happen at the top level.
436 // In future, we'll need the handle them everywhere, but that's
437 // a bit trickier
438 if(_records[i] instanceof PositionDependentRecord) {
439 // We've already figured out their new location, and
440 // told them that
441 // Tell them of the positions of the other records though
442 PositionDependentRecord pdr = (PositionDependentRecord)_records[i];
443 pdr.updateOtherRecordReferences(oldToNewPositions);
444 }
445
446 // Whatever happens, write out that record tree
447 _records[i].writeOut(baos);
448 }
449 // Update our cached copy of the bytes that make up the PPT stream
450 _docstream = baos.toByteArray();
451
452 // Write the PPT stream into the POIFS layer
453 ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
454 outFS.createDocument(bais,"PowerPoint Document");
455 writtenEntries.add("PowerPoint Document");
456
457
458 // Update and write out the Current User atom
459 int oldLastUserEditAtomPos = (int)currentUser.getCurrentEditOffset();
460 Integer newLastUserEditAtomPos = (Integer)oldToNewPositions.get(new Integer(oldLastUserEditAtomPos));
461 if(newLastUserEditAtomPos == null) {
462 throw new HSLFException("Couldn't find the new location of the UserEditAtom that used to be at " + oldLastUserEditAtomPos);
463 }
464 currentUser.setCurrentEditOffset(newLastUserEditAtomPos.intValue());
465 currentUser.writeToFS(outFS);
466 writtenEntries.add("Current User");
467
468
469 // Write any pictures, into another stream
470 if (_pictures != null) {
471 ByteArrayOutputStream pict = new ByteArrayOutputStream();
472 for (int i = 0; i < _pictures.length; i++ ) {
473 _pictures[i].write(pict);
474 }
475 outFS.createDocument(
476 new ByteArrayInputStream(pict.toByteArray()), "Pictures"
477 );
478 writtenEntries.add("Pictures");
479 }
480
481 // If requested, write out any other streams we spot
482 if(preserveNodes) {
483 copyNodes(filesystem, outFS, writtenEntries);
484 }
485
486 // Send the POIFSFileSystem object out to the underlying stream
487 outFS.writeFilesystem(out);
488 }
489
490
491 /* ******************* adding methods follow ********************* */
492
493 /**
494 * Adds a new root level record, at the end, but before the last
495 * PersistPtrIncrementalBlock.
496 */
497 public synchronized int appendRootLevelRecord(Record newRecord) {
498 int addedAt = -1;
499 Record[] r = new Record[_records.length+1];
500 boolean added = false;
501 for(int i=(_records.length-1); i>=0; i--) {
502 if(added) {
503 // Just copy over
504 r[i] = _records[i];
505 } else {
506 r[(i+1)] = _records[i];
507 if(_records[i] instanceof PersistPtrHolder) {
508 r[i] = newRecord;
509 added = true;
510 addedAt = i;
511 }
512 }
513 }
514 _records = r;
515 return addedAt;
516 }
517
518 /**
519 * Add a new picture to this presentation.
520 */
521 public void addPicture(PictureData img) {
522 // Copy over the existing pictures, into an array one bigger
523 PictureData[] lst;
524 if(_pictures == null) {
525 lst = new PictureData[1];
526 } else {
527 lst = new PictureData[(_pictures.length+1)];
528 System.arraycopy(_pictures,0,lst,0,_pictures.length);
529 }
530 // Add in the new image
531 lst[lst.length - 1] = img;
532 _pictures = lst;
533 }
534
535 /* ******************* fetching methods follow ********************* */
536
537
538 /**
539 * Returns an array of all the records found in the slideshow
540 */
541 public Record[] getRecords() { return _records; }
542
543 /**
544 * Returns an array of the bytes of the file. Only correct after a
545 * call to open or write - at all other times might be wrong!
546 */
547 public byte[] getUnderlyingBytes() { return _docstream; }
548
549 /**
550 * Fetch the Current User Atom of the document
551 */
552 public CurrentUserAtom getCurrentUserAtom() { return currentUser; }
553
554 /**
555 * Return array of pictures contained in this presentation
556 *
557 * @return array with the read pictures or <code>null</code> if the
558 * presentation doesn't contain pictures.
559 */
560 public PictureData[] getPictures() {
561 return _pictures;
562 }
563
564 /**
565 * Gets embedded object data from the slide show.
566 *
567 * @return the embedded objects.
568 */
569 public ObjectData[] getEmbeddedObjects() {
570 if (_objects == null) {
571 List objects = new ArrayList();
572 for (int i = 0; i < _records.length; i++) {
573 if (_records[i] instanceof ExOleObjStg) {
574 objects.add(new ObjectData((ExOleObjStg) _records[i]));
575 }
576 }
577 _objects = (ObjectData[]) objects.toArray(new ObjectData[objects.size()]);
578 }
579 return _objects;
580 }
581 }