Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/drew/metadata/exif/ExifReader.java


1   /*
2    * EXIFExtractor.java
3    *
4    * This class based upon code from Jhead, a C program for extracting and
5    * manipulating the Exif data within files written by Matthias Wandel.
6    *   http://www.sentex.net/~mwandel/jhead/
7    *
8    * Jhead is public domain software - that is, you can do whatever you want
9    * with it, and include it software that is licensed under the GNU or the
10   * BSD license, or whatever other licence you choose, including proprietary
11   * closed source licenses.  Similarly, I release this Java version under the
12   * same license, though I do ask that you leave this header in tact.
13   *
14   * If you make modifications to this code that you think would benefit the
15   * wider community, please send me a copy and I'll post it on my site.  Unlike
16   * Jhead, this code (as it stands) only supports reading of Exif data - no
17   * manipulation, and no thumbnail stuff.
18   *
19   * If you make use of this code, I'd appreciate hearing about it.
20   *   drew.noakes@drewnoakes.com
21   * Latest version of this software kept at
22   *   http://drewnoakes.com/
23   *
24   * Created on 28 April 2002, 23:54
25   * Modified 04 Aug 2002
26   * - Renamed constants to be inline with changes to ExifTagValues interface
27   * - Substituted usage of JDK 1.4 features (java.nio package)
28   * Modified 29 Oct 2002 (v1.2)
29   * - Proper traversing of Exif file structure and complete refactor & tidy of
30   *   the codebase (a few unnoticed bugs removed)
31   * - Reads makernote data for 6 families of camera (5 makes)
32   * - Tags now stored in directories... use the IFD_* constants to refer to the
33   *   image file directory you require (Exif, Interop, GPS and Makernote*) --
34   *   this avoids collisions where two tags share the same code
35   * - Takes componentCount of unknown tags into account
36   * - Now understands GPS tags (thanks to Colin Briton for his help with this)
37   * - Some other bug fixes, pointed out by users around the world.  Thanks!
38   * Modified 27 Nov 2002 (v2.0)
39   * - Renamed to ExifReader
40   * - Moved to new package com.drew.metadata.exif
41   */
42  package com.drew.metadata.exif;
43  
44  import com.drew.imaging.jpeg.JpegProcessingException;
45  import com.drew.imaging.jpeg.JpegSegmentReader;
46  import com.drew.lang.Rational;
47  import com.drew.metadata.*;
48  
49  import java.io.File;
50  import java.io.FileNotFoundException;
51  
52  /**
53   * Extracts Exif data from a JPEG header segment, providing information about the
54   * camera/scanner/capture device (if available).  Information is encapsulated in
55   * an <code>Metadata</code> object.
56   * @author  Drew Noakes http://drewnoakes.com
57   */
58  public class ExifReader implements MetadataReader
59  {
60      /**
61       * The JPEG segment as an array of bytes.
62       */
63      private byte[] _data;
64  
65      /**
66       * Represents the native byte ordering used in the JPEG segment.  If true,
67       * then we're using Motorolla ordering (Big endian), else we're using Intel
68       * ordering (Little endian).
69       */
70      private boolean motorollaByteOrder;
71  
72      /**
73       * Bean instance to store information about the image and camera/scanner/capture
74       * device.
75       */
76      private Metadata _metadata;
77  
78      /**
79       * The number of bytes used per format descriptor.
80       */
81      static final int[] BYTES_PER_FORMAT = {0, 1, 1, 2, 4, 8, 1, 1, 2, 4, 8, 4, 8};
82  
83      /**
84       * The number of formats known.
85       */
86      private static final int MAX_FORMAT_CODE = 12;
87  
88      // the format enumeration
89      private static final int FMT_BYTE = 1;
90      private static final int FMT_STRING = 2;
91      private static final int FMT_USHORT = 3;
92      private static final int FMT_ULONG = 4;
93      private static final int FMT_URATIONAL = 5;
94      private static final int FMT_SBYTE = 6;
95      private static final int FMT_UNDEFINED = 7;
96      private static final int FMT_SSHORT = 8;
97      private static final int FMT_SLONG = 9;
98      private static final int FMT_SRATIONAL = 10;
99      private static final int FMT_SINGLE = 11;
100     private static final int FMT_DOUBLE = 12;
101 
102     public static final int TAG_EXIF_OFFSET = 0x8769;
103     public static final int TAG_INTEROP_OFFSET = 0xA005;
104     public static final int TAG_GPS_INFO_OFFSET = 0x8825;
105     public static final int TAG_MAKER_NOTE = 0x927C;
106 
107     public static final int TIFF_HEADER_START_OFFSET = 6;
108 
109     /**
110      *
111      * @param file
112      * @throws JpegProcessingException
113      * @throws FileNotFoundException
114      */
115     public ExifReader(File file) throws JpegProcessingException, FileNotFoundException
116     {
117         this(new JpegSegmentReader(file).readSegment(JpegSegmentReader.SEGMENT_APP1));
118     }
119 
120     /**
121      * Creates an ExifReader for the given JPEG header segment.
122      */
123     public ExifReader(byte[] data)
124     {
125         _data = data;
126     }
127 
128     /**
129      * Performs the Exif data extraction, returning a new instance of <code>Metadata</code>.
130      */
131     public Metadata extract()
132     {
133         return extract(new Metadata());
134     }
135 
136     /**
137      * Performs the Exif data extraction, adding found values to the specified
138      * instance of <code>Metadata</code>.
139      */
140     public Metadata extract(Metadata metadata)
141     {
142         _metadata = metadata;
143         if (_data == null) {
144             return _metadata;
145         }
146 
147         // once we know there's some data, create the directory and start working on it
148         Directory directory = _metadata.getDirectory(ExifDirectory.class);
149         if (_data.length <= 14) {
150             directory.addError("Exif data segment must contain at least 14 bytes");
151             return _metadata;
152         }
153         if (!"Exif\0\0".equals(new String(_data, 0, 6))) {
154             directory.addError("Exif data segment doesn't begin with 'Exif'");
155             return _metadata;
156         }
157 
158         // this should be either "MM" or "II"
159         String byteOrderIdentifier = new String(_data, 6, 2);
160         if (!setByteOrder(byteOrderIdentifier)) {
161             directory.addError("Unclear distinction between Motorola/Intel byte ordering");
162             return _metadata;
163         }
164 
165         // Check the next two values for correctness.
166         if (get16Bits(8) != 0x2a) {
167             directory.addError("Invalid Exif start - should have 0x2A at offset 8 in Exif header");
168             return _metadata;
169         }
170 
171         int firstDirectoryOffset = get32Bits(10) + TIFF_HEADER_START_OFFSET;
172 
173         // David Ekholm sent an digital camera image that has this problem
174         if (firstDirectoryOffset >= _data.length - 1) {
175             directory.addError("First exif directory offset is beyond end of Exif data segment");
176             // First directory normally starts 14 bytes in -- try it here and catch another error in the worst case
177             firstDirectoryOffset = 14;
178         }
179 
180         // 0th IFD (we merge with Exif IFD)
181         processDirectory(directory, firstDirectoryOffset);
182 
183         // after the extraction process, if we have the correct tags, we may be able to extract thumbnail information
184         extractThumbnail(directory);
185 
186         return _metadata;
187     }
188 
189     private void extractThumbnail(Directory exifDirectory)
190     {
191         if (!(exifDirectory instanceof ExifDirectory)) {
192             return;
193         }
194         if (!exifDirectory.containsTag(ExifDirectory.TAG_THUMBNAIL_LENGTH) ||
195                 !exifDirectory.containsTag(ExifDirectory.TAG_THUMBNAIL_OFFSET)) {
196             return;
197         }
198         int offset, length;
199         try {
200             offset = exifDirectory.getInt(ExifDirectory.TAG_THUMBNAIL_OFFSET);
201             length = exifDirectory.getInt(ExifDirectory.TAG_THUMBNAIL_LENGTH);
202             byte[] result = new byte[length];
203             for (int i = 0; i < result.length; i++) {
204                 result[i] = _data[TIFF_HEADER_START_OFFSET + offset + i];
205             }
206             exifDirectory.setByteArray(ExifDirectory.TAG_THUMBNAIL_DATA, result);
207         } catch (Throwable e) {
208             exifDirectory.addError("Unable to extract thumbnail: " + e.getMessage());
209         }
210     }
211 
212     private boolean setByteOrder(String byteOrderIdentifier)
213     {
214         if ("MM".equals(byteOrderIdentifier)) {
215             motorollaByteOrder = true;
216         } else if ("II".equals(byteOrderIdentifier)) {
217             motorollaByteOrder = false;
218         } else {
219             return false;
220         }
221         return true;
222     }
223 
224     /**
225      * Process one of the nested EXIF directories.
226      */
227     private void processDirectory(Directory directory, int dirStartOffset)
228     {
229 //        if (dirStartOffset>=_data.length) {
230 //            return;
231 //        }
232 
233         // First two bytes in the IFD are the tag count
234         int dirTagCount = get16Bits(dirStartOffset);
235 
236         if (!isDirectoryLengthValid(dirStartOffset)) {
237             directory.addError("Illegally sized directory");
238             return;
239         }
240 
241         // Handle each tag in this directory
242         for (int dirEntry = 0; dirEntry < dirTagCount; dirEntry++) {
243             int dirEntryOffset = calculateDirectoryEntryOffset(dirStartOffset, dirEntry);
244             int tagType = get16Bits(dirEntryOffset);
245             int formatCode = get16Bits(dirEntryOffset + 2);
246             if (formatCode < 0 || formatCode > MAX_FORMAT_CODE) {
247                 directory.addError("Invalid format code: " + formatCode);
248                 continue;
249             }
250 
251             // 4 bytes indicating number of formatCode type data for this tag
252             int componentCount = get32Bits(dirEntryOffset + 4);
253             int byteCount = componentCount * BYTES_PER_FORMAT[formatCode];
254             int tagValueOffset = calculateTagValueOffset(byteCount, dirEntryOffset);
255             if (tagValueOffset<0) {
256                 directory.addError("Illegal pointer offset value in EXIF");
257                 continue;
258             }
259 
260             // Calculate the value as an offset for cases where the tag is represents directory
261             int subdirOffset = TIFF_HEADER_START_OFFSET + get32Bits(tagValueOffset);
262 
263             switch (tagType) {
264                 case TAG_EXIF_OFFSET:
265                     processDirectory(_metadata.getDirectory(ExifDirectory.class), subdirOffset);
266                     continue;
267                 case TAG_INTEROP_OFFSET:
268                     processDirectory(_metadata.getDirectory(ExifInteropDirectory.class), subdirOffset);
269                     continue;
270                 case TAG_GPS_INFO_OFFSET:
271                     processDirectory(_metadata.getDirectory(GpsDirectory.class), subdirOffset);
272                     continue;
273                 case TAG_MAKER_NOTE:
274                     processMakerNote(tagValueOffset);
275                     continue;
276                 default:
277                     processTag(directory, tagType, tagValueOffset, componentCount, formatCode);
278                     break;
279             }
280         }
281         // At the end of each IFD is an optional link to the next IFD.  This link is after
282         // the 2-byte tag count, and after 12 bytes for each of these tags, hence
283         int nextDirectoryOffset = get32Bits(dirStartOffset + 2 + 12 * dirTagCount);
284         if (nextDirectoryOffset != 0) {
285             nextDirectoryOffset += TIFF_HEADER_START_OFFSET;
286             if (nextDirectoryOffset >= _data.length) {
287                 // Last 4 bytes of IFD reference another IFD with an address that is out of bounds
288                 // Note this could have been caused by jhead 1.3 cropping too much
289                 return;
290             }
291             // the next directory is of same type as this one
292             processDirectory(directory, nextDirectoryOffset);
293         }
294     }
295 
296     private void processMakerNote(int subdirOffset)
297     {
298         // Determine the camera model and makernote format
299         Directory exifDirectory = _metadata.getDirectory(ExifDirectory.class);
300         if (exifDirectory == null) {
301             return;
302         }
303         String cameraModel = exifDirectory.getString(ExifDirectory.TAG_MAKE);
304         if ("OLYMP".equals(new String(_data, subdirOffset, 5))) {
305             // Olympus Makernote
306             processDirectory(_metadata.getDirectory(OlympusMakernoteDirectory.class), subdirOffset + 8);
307         } else if ("NIKON".equalsIgnoreCase(cameraModel)) {
308             if ("Nikon".equals(new String(_data, subdirOffset, 5))) {
309                 // Nikon type 1 Makernote
310                 processDirectory(_metadata.getDirectory(NikonType1MakernoteDirectory.class), subdirOffset + 8);
311             } else {
312                 // Nikon type 2 Makernote
313                 processDirectory(_metadata.getDirectory(NikonType2MakernoteDirectory.class), subdirOffset);
314             }
315         } else if ("Canon".equalsIgnoreCase(cameraModel)) {
316             // Canon Makernote
317             processDirectory(_metadata.getDirectory(CanonMakernoteDirectory.class), subdirOffset);
318         } else if ("Casio".equalsIgnoreCase(cameraModel)) {
319             // Casio Makernote
320             processDirectory(_metadata.getDirectory(CasioMakernoteDirectory.class), subdirOffset);
321         } else if ("FUJIFILM".equals(new String(_data, subdirOffset, 8)) || "Fujifilm".equalsIgnoreCase(cameraModel)) {
322             // Fujifile Makernote
323             boolean byteOrderBefore = motorollaByteOrder;
324             // bug in fujifilm makernote ifd means we temporarily use Intel byte ordering
325             motorollaByteOrder = false;
326             // the 4 bytes after "FUJIFILM" in the makernote point to the start of the makernote
327             // IFD, though the offset is relative to the start of the makernote, not the TIFF
328             // header (like everywhere else)
329             int ifdStart = subdirOffset + get32Bits(subdirOffset + 8);
330             processDirectory(_metadata.getDirectory(FujiFilmMakernoteDirectory.class), ifdStart);
331             motorollaByteOrder = byteOrderBefore;
332         }
333     }
334 
335     private boolean isDirectoryLengthValid(int dirStartOffset)
336     {
337         int dirTagCount = get16Bits(dirStartOffset);
338         int dirLength = (2 + (12 * dirTagCount) + 4);
339         if (dirLength + dirStartOffset + TIFF_HEADER_START_OFFSET >= _data.length) {
340             // Note: Files that had thumbnails trimmed with jhead 1.3 or earlier might trigger this
341             return false;
342         }
343         return true;
344     }
345 
346     private void processTag(Directory directory, int tagType, int tagValueOffset, int componentCount, int formatCode)
347     {
348         // Directory simply stores raw values
349         // The display side uses a Descriptor class per directory to turn the raw values into 'pretty' descriptions
350         switch (formatCode) {
351             case FMT_UNDEFINED:
352             case FMT_STRING:
353                 String s = readString(tagValueOffset, componentCount);
354                 directory.setString(tagType, s);
355                 break;
356             case FMT_SRATIONAL:
357             case FMT_URATIONAL:
358                 if (componentCount == 1) {
359                     Rational rational = new Rational(get32Bits(tagValueOffset), get32Bits(tagValueOffset + 4));
360                     directory.setRational(tagType, rational);
361                 } else {
362                     Rational[] rationals = new Rational[componentCount];
363                     for (int i = 0; i < componentCount; i++) {
364                         rationals[i] = new Rational(get32Bits(tagValueOffset + (8 * i)), get32Bits(tagValueOffset + 4 + (8 * i)));
365                     }
366                     directory.setRationalArray(tagType, rationals);
367                 }
368                 break;
369             case FMT_SBYTE:
370             case FMT_BYTE:
371                 if (componentCount == 1) {
372                     // this may need to be a byte, but I think casting to int is fine
373                     int b = _data[tagValueOffset];
374                     directory.setInt(tagType, b);
375                 } else {
376                     int[] bytes = new int[componentCount];
377                     for (int i = 0; i < componentCount; i++) {
378                         bytes[i] = _data[tagValueOffset];
379                     }
380                     directory.setIntArray(tagType, bytes);
381                 }
382                 break;
383             case FMT_SINGLE:
384             case FMT_DOUBLE:
385                 if (componentCount == 1) {
386                     int i = _data[tagValueOffset];
387                     directory.setInt(tagType, i);
388                 } else {
389                     int[] ints = new int[componentCount];
390                     for (int i = 0; i < componentCount; i++) {
391                         ints[i] = _data[tagValueOffset];
392                     }
393                     directory.setIntArray(tagType, ints);
394                 }
395                 break;
396             case FMT_USHORT:
397             case FMT_SSHORT:
398                 if (componentCount == 1) {
399                     int i = get16Bits(tagValueOffset);
400                     directory.setInt(tagType, i);
401                 } else {
402                     int[] ints = new int[componentCount];
403                     for (int i = 0; i < componentCount; i++) {
404                         ints[i] = get16Bits(tagValueOffset + (i * 2));
405                     }
406                     directory.setIntArray(tagType, ints);
407                 }
408                 break;
409             case FMT_SLONG:
410             case FMT_ULONG:
411                 if (componentCount == 1) {
412                     int i = get32Bits(tagValueOffset);
413                     directory.setInt(tagType, i);
414                 } else {
415                     int[] ints = new int[componentCount];
416                     for (int i = 0; i < componentCount; i++) {
417                         ints[i] = get32Bits(tagValueOffset + (i * 4));
418                     }
419                     directory.setIntArray(tagType, ints);
420                 }
421                 break;
422             default:
423                 directory.addError("unknown format code " + formatCode);
424         }
425     }
426 /*
427     private void processExifTag(int tagType, int tagValueOffset, int componentCount, int formatCode)
428     {
429         switch (tagType) {
430 // TODO test this still works
431             case TAG_USER_COMMENT:
432                 // Olympus has this padded with trailing spaces.  Remove these first.
433                 // ArrayIndexOutOfBoundsException bug fixed by Hendrik Wördehoff - 20 Sep 2002
434                 int byteCount = componentCount * BYTES_PER_FORMAT[formatCode];
435                 for (int i = byteCount - 1; i >= 0; i--) {
436                     if (_data[tagValueOffset + i] == ' ') {
437                         _data[tagValueOffset + i] = (byte)'\0';
438                     } else {
439                         break;
440                     }
441                 }
442                 // Copy the comment
443                 if ("ASCII".equals(new String(_data, tagValueOffset, 5))) {
444                     for (int i = 5; i < 10; i++) {
445                         byte b = _data[tagValueOffset + i];
446                         if (b != '\0' && b != ' ') {
447                             _metadata.setString(DIRECTORY_EXIF_EXIF, TAG_USER_COMMENT, readString(tagValueOffset + i, 199));
448                             break;
449                         }
450                     }
451                 } else {
452                     _metadata.setString(DIRECTORY_EXIF_EXIF, TAG_USER_COMMENT, readString(tagValueOffset, 199));
453                 }
454                 break;
455 
456 // TODO work out what to do with this calculation
457                 // More relevant info always comes earlier, so only use this field if we don't
458                 // have appropriate aperture information yet.
459             case TAG_APERTURE:
460             case TAG_MAX_APERTURE:
461                 if (!_metadata.containsTag(DIRECTORY_EXIF_EXIF, TAG_FNUMBER)) {
462                     _metadata.setFloat(DIRECTORY_EXIF_EXIF, TAG_FNUMBER, (float)Math.exp(convertTagToNumber(tagValueOffset, formatCode) * Math.log(2) * 0.5));
463                 }
464                 break;
465 
466 // TODO copy these comments somewhere
467                 // Simplest way of expressing exposure time, so I trust it most (overwrite previously computd value if there is one)
468             case TAG_EXPOSURE_TIME:
469                 // Indicates the distance the autofocus camera is focused to.  Tends to be less accurate as distance increases.
470             case TAG_SUBJECT_DISTANCE:
471                 // Nice digital cameras actually save the focal length as a function of how far they are zoomed in.
472             case TAG_FOCAL_LENGTH:
473                 // Simplest way of expressing aperture, so I trust it the most. (overwrite previously computed value if there is one)
474             case TAG_FNUMBER:
475             case TAG_EXPOSURE_BIAS:
476                 _metadata.setFloat(DIRECTORY_EXIF_EXIF, tagType, (float)convertTagToNumber(tagValueOffset, formatCode));
477                 break;
478 
479 // TODO work out what to do with this calculation
480                 // More complicated way of expressing exposure time, so only use this value if we don't already have it from somewhere else.
481             case TAG_SHUTTER_SPEED:
482                 if (!_metadata.containsTag(DIRECTORY_EXIF_EXIF, TAG_EXPOSURE_TIME)) {
483                     _metadata.setFloat(DIRECTORY_EXIF_EXIF, TAG_EXPOSURE_TIME, (float)(1 / Math.exp(convertTagToNumber(tagValueOffset, formatCode) * Math.log(2))));
484                 }
485                 break;
486         }
487     }
488 */
489 
490     private int calculateTagValueOffset(int byteCount, int dirEntryOffset)
491     {
492         if (byteCount > 4) {
493             // If its bigger than 4 bytes, the dir entry contains an offset.
494             // TODO if we're reading FujiFilm makernote tags, the offset is relative to the start of the makernote itself, not the TIFF segment
495             int offsetVal = get32Bits(dirEntryOffset + 8);
496             if (offsetVal + byteCount > _data.length) {
497                 // Bogus pointer offset and / or bytecount value
498                 return -1; // signal error
499             }
500             return TIFF_HEADER_START_OFFSET + offsetVal;
501         } else {
502             // 4 bytes or less and value is in the dir entry itself
503             return dirEntryOffset + 8;
504         }
505     }
506 
507     /**
508      * Creates a String from the _data buffer starting at the specified offset,
509      * and ending where byte=='\0' or where length==maxLength.
510      */
511     private String readString(int offset, int maxLength)
512     {
513         int length = 0;
514         while ((offset + length) < _data.length && _data[offset + length] != '\0' && length < maxLength) {
515             length++;
516         }
517         return new String(_data, offset, length);
518     }
519 
520     /**
521      * Determine the offset at which a given InteropArray entry begins within the specified IFD.
522      * @param ifdStartOffset the offset at which the IFD starts
523      * @param entryNumber the zero-based entry number
524      */
525     private int calculateDirectoryEntryOffset(int ifdStartOffset, int entryNumber)
526     {
527         return (ifdStartOffset + 2 + (12 * entryNumber));
528     }
529 
530     /**
531      * Get a 16 bit value from file's native byte order.  Between 0x0000 and 0xFFFF.
532      */
533     private int get16Bits(int offset)
534     {
535         if (offset < 0 || offset >= _data.length) {
536             throw new ArrayIndexOutOfBoundsException("attempt to read data outside of exif segment (index " + offset + " where max index is " + (_data.length - 1) + ")");
537         }
538         if (motorollaByteOrder) {
539             // Motorola big first
540             return (_data[offset] << 8 & 0xFF00) | (_data[offset + 1] & 0xFF);
541         } else {
542             // Intel ordering
543             return (_data[offset + 1] << 8 & 0xFF00) | (_data[offset] & 0xFF);
544         }
545     }
546 
547     /**
548      * Get a 32 bit value from file's native byte order.
549      */
550     private int get32Bits(int offset)
551     {
552         if (offset < 0 || offset >= _data.length) {
553             throw new ArrayIndexOutOfBoundsException("attempt to read data outside of exif segment (index " + offset + " where max index is " + (_data.length - 1) + ")");
554         }
555         // TODO report this bug in IntelliJ -- 0xFF000000 too large ???
556         if (motorollaByteOrder) {
557             // Motorola big first
558             return (_data[offset] << 24 & 0xFF000000) |
559                     (_data[offset + 1] << 16 & 0xFF0000) |
560                     (_data[offset + 2] << 8 & 0xFF00) |
561                     (_data[offset + 3] & 0xFF);
562         } else {
563             // Intel ordering
564             return (_data[offset + 3] << 24 & 0xFF000000) |
565                     (_data[offset + 2] << 16 & 0xFF0000) |
566                     (_data[offset + 1] << 8 & 0xFF00) |
567                     (_data[offset] & 0xFF);
568         }
569     }
570 }