Source code: com/drew/metadata/exif/ExifReader.java
1 /*
2 * EXIFExtractor.java
3 *
4 * This class based upon code from Jhead, a C program for extracting and
5 * manipulating the Exif data within files written by Matthias Wandel.
6 * http://www.sentex.net/~mwandel/jhead/
7 *
8 * Jhead is public domain software - that is, you can do whatever you want
9 * with it, and include it software that is licensed under the GNU or the
10 * BSD license, or whatever other licence you choose, including proprietary
11 * closed source licenses. Similarly, I release this Java version under the
12 * same license, though I do ask that you leave this header in tact.
13 *
14 * If you make modifications to this code that you think would benefit the
15 * wider community, please send me a copy and I'll post it on my site. Unlike
16 * Jhead, this code (as it stands) only supports reading of Exif data - no
17 * manipulation, and no thumbnail stuff.
18 *
19 * If you make use of this code, I'd appreciate hearing about it.
20 * drew.noakes@drewnoakes.com
21 * Latest version of this software kept at
22 * http://drewnoakes.com/
23 *
24 * Created on 28 April 2002, 23:54
25 * Modified 04 Aug 2002
26 * - Renamed constants to be inline with changes to ExifTagValues interface
27 * - Substituted usage of JDK 1.4 features (java.nio package)
28 * Modified 29 Oct 2002 (v1.2)
29 * - Proper traversing of Exif file structure and complete refactor & tidy of
30 * the codebase (a few unnoticed bugs removed)
31 * - Reads makernote data for 6 families of camera (5 makes)
32 * - Tags now stored in directories... use the IFD_* constants to refer to the
33 * image file directory you require (Exif, Interop, GPS and Makernote*) --
34 * this avoids collisions where two tags share the same code
35 * - Takes componentCount of unknown tags into account
36 * - Now understands GPS tags (thanks to Colin Briton for his help with this)
37 * - Some other bug fixes, pointed out by users around the world. Thanks!
38 * Modified 27 Nov 2002 (v2.0)
39 * - Renamed to ExifReader
40 * - Moved to new package com.drew.metadata.exif
41 */
42 package com.drew.metadata.exif;
43
44 import com.drew.imaging.jpeg.JpegProcessingException;
45 import com.drew.imaging.jpeg.JpegSegmentReader;
46 import com.drew.lang.Rational;
47 import com.drew.metadata.*;
48
49 import java.io.File;
50 import java.io.FileNotFoundException;
51
52 /**
53 * Extracts Exif data from a JPEG header segment, providing information about the
54 * camera/scanner/capture device (if available). Information is encapsulated in
55 * an <code>Metadata</code> object.
56 * @author Drew Noakes http://drewnoakes.com
57 */
58 public class ExifReader implements MetadataReader
59 {
60 /**
61 * The JPEG segment as an array of bytes.
62 */
63 private byte[] _data;
64
65 /**
66 * Represents the native byte ordering used in the JPEG segment. If true,
67 * then we're using Motorolla ordering (Big endian), else we're using Intel
68 * ordering (Little endian).
69 */
70 private boolean motorollaByteOrder;
71
72 /**
73 * Bean instance to store information about the image and camera/scanner/capture
74 * device.
75 */
76 private Metadata _metadata;
77
78 /**
79 * The number of bytes used per format descriptor.
80 */
81 static final int[] BYTES_PER_FORMAT = {0, 1, 1, 2, 4, 8, 1, 1, 2, 4, 8, 4, 8};
82
83 /**
84 * The number of formats known.
85 */
86 private static final int MAX_FORMAT_CODE = 12;
87
88 // the format enumeration
89 private static final int FMT_BYTE = 1;
90 private static final int FMT_STRING = 2;
91 private static final int FMT_USHORT = 3;
92 private static final int FMT_ULONG = 4;
93 private static final int FMT_URATIONAL = 5;
94 private static final int FMT_SBYTE = 6;
95 private static final int FMT_UNDEFINED = 7;
96 private static final int FMT_SSHORT = 8;
97 private static final int FMT_SLONG = 9;
98 private static final int FMT_SRATIONAL = 10;
99 private static final int FMT_SINGLE = 11;
100 private static final int FMT_DOUBLE = 12;
101
102 public static final int TAG_EXIF_OFFSET = 0x8769;
103 public static final int TAG_INTEROP_OFFSET = 0xA005;
104 public static final int TAG_GPS_INFO_OFFSET = 0x8825;
105 public static final int TAG_MAKER_NOTE = 0x927C;
106
107 public static final int TIFF_HEADER_START_OFFSET = 6;
108
109 /**
110 *
111 * @param file
112 * @throws JpegProcessingException
113 * @throws FileNotFoundException
114 */
115 public ExifReader(File file) throws JpegProcessingException, FileNotFoundException
116 {
117 this(new JpegSegmentReader(file).readSegment(JpegSegmentReader.SEGMENT_APP1));
118 }
119
120 /**
121 * Creates an ExifReader for the given JPEG header segment.
122 */
123 public ExifReader(byte[] data)
124 {
125 _data = data;
126 }
127
128 /**
129 * Performs the Exif data extraction, returning a new instance of <code>Metadata</code>.
130 */
131 public Metadata extract()
132 {
133 return extract(new Metadata());
134 }
135
136 /**
137 * Performs the Exif data extraction, adding found values to the specified
138 * instance of <code>Metadata</code>.
139 */
140 public Metadata extract(Metadata metadata)
141 {
142 _metadata = metadata;
143 if (_data == null) {
144 return _metadata;
145 }
146
147 // once we know there's some data, create the directory and start working on it
148 Directory directory = _metadata.getDirectory(ExifDirectory.class);
149 if (_data.length <= 14) {
150 directory.addError("Exif data segment must contain at least 14 bytes");
151 return _metadata;
152 }
153 if (!"Exif\0\0".equals(new String(_data, 0, 6))) {
154 directory.addError("Exif data segment doesn't begin with 'Exif'");
155 return _metadata;
156 }
157
158 // this should be either "MM" or "II"
159 String byteOrderIdentifier = new String(_data, 6, 2);
160 if (!setByteOrder(byteOrderIdentifier)) {
161 directory.addError("Unclear distinction between Motorola/Intel byte ordering");
162 return _metadata;
163 }
164
165 // Check the next two values for correctness.
166 if (get16Bits(8) != 0x2a) {
167 directory.addError("Invalid Exif start - should have 0x2A at offset 8 in Exif header");
168 return _metadata;
169 }
170
171 int firstDirectoryOffset = get32Bits(10) + TIFF_HEADER_START_OFFSET;
172
173 // David Ekholm sent an digital camera image that has this problem
174 if (firstDirectoryOffset >= _data.length - 1) {
175 directory.addError("First exif directory offset is beyond end of Exif data segment");
176 // First directory normally starts 14 bytes in -- try it here and catch another error in the worst case
177 firstDirectoryOffset = 14;
178 }
179
180 // 0th IFD (we merge with Exif IFD)
181 processDirectory(directory, firstDirectoryOffset);
182
183 // after the extraction process, if we have the correct tags, we may be able to extract thumbnail information
184 extractThumbnail(directory);
185
186 return _metadata;
187 }
188
189 private void extractThumbnail(Directory exifDirectory)
190 {
191 if (!(exifDirectory instanceof ExifDirectory)) {
192 return;
193 }
194 if (!exifDirectory.containsTag(ExifDirectory.TAG_THUMBNAIL_LENGTH) ||
195 !exifDirectory.containsTag(ExifDirectory.TAG_THUMBNAIL_OFFSET)) {
196 return;
197 }
198 int offset, length;
199 try {
200 offset = exifDirectory.getInt(ExifDirectory.TAG_THUMBNAIL_OFFSET);
201 length = exifDirectory.getInt(ExifDirectory.TAG_THUMBNAIL_LENGTH);
202 byte[] result = new byte[length];
203 for (int i = 0; i < result.length; i++) {
204 result[i] = _data[TIFF_HEADER_START_OFFSET + offset + i];
205 }
206 exifDirectory.setByteArray(ExifDirectory.TAG_THUMBNAIL_DATA, result);
207 } catch (Throwable e) {
208 exifDirectory.addError("Unable to extract thumbnail: " + e.getMessage());
209 }
210 }
211
212 private boolean setByteOrder(String byteOrderIdentifier)
213 {
214 if ("MM".equals(byteOrderIdentifier)) {
215 motorollaByteOrder = true;
216 } else if ("II".equals(byteOrderIdentifier)) {
217 motorollaByteOrder = false;
218 } else {
219 return false;
220 }
221 return true;
222 }
223
224 /**
225 * Process one of the nested EXIF directories.
226 */
227 private void processDirectory(Directory directory, int dirStartOffset)
228 {
229 // if (dirStartOffset>=_data.length) {
230 // return;
231 // }
232
233 // First two bytes in the IFD are the tag count
234 int dirTagCount = get16Bits(dirStartOffset);
235
236 if (!isDirectoryLengthValid(dirStartOffset)) {
237 directory.addError("Illegally sized directory");
238 return;
239 }
240
241 // Handle each tag in this directory
242 for (int dirEntry = 0; dirEntry < dirTagCount; dirEntry++) {
243 int dirEntryOffset = calculateDirectoryEntryOffset(dirStartOffset, dirEntry);
244 int tagType = get16Bits(dirEntryOffset);
245 int formatCode = get16Bits(dirEntryOffset + 2);
246 if (formatCode < 0 || formatCode > MAX_FORMAT_CODE) {
247 directory.addError("Invalid format code: " + formatCode);
248 continue;
249 }
250
251 // 4 bytes indicating number of formatCode type data for this tag
252 int componentCount = get32Bits(dirEntryOffset + 4);
253 int byteCount = componentCount * BYTES_PER_FORMAT[formatCode];
254 int tagValueOffset = calculateTagValueOffset(byteCount, dirEntryOffset);
255 if (tagValueOffset<0) {
256 directory.addError("Illegal pointer offset value in EXIF");
257 continue;
258 }
259
260 // Calculate the value as an offset for cases where the tag is represents directory
261 int subdirOffset = TIFF_HEADER_START_OFFSET + get32Bits(tagValueOffset);
262
263 switch (tagType) {
264 case TAG_EXIF_OFFSET:
265 processDirectory(_metadata.getDirectory(ExifDirectory.class), subdirOffset);
266 continue;
267 case TAG_INTEROP_OFFSET:
268 processDirectory(_metadata.getDirectory(ExifInteropDirectory.class), subdirOffset);
269 continue;
270 case TAG_GPS_INFO_OFFSET:
271 processDirectory(_metadata.getDirectory(GpsDirectory.class), subdirOffset);
272 continue;
273 case TAG_MAKER_NOTE:
274 processMakerNote(tagValueOffset);
275 continue;
276 default:
277 processTag(directory, tagType, tagValueOffset, componentCount, formatCode);
278 break;
279 }
280 }
281 // At the end of each IFD is an optional link to the next IFD. This link is after
282 // the 2-byte tag count, and after 12 bytes for each of these tags, hence
283 int nextDirectoryOffset = get32Bits(dirStartOffset + 2 + 12 * dirTagCount);
284 if (nextDirectoryOffset != 0) {
285 nextDirectoryOffset += TIFF_HEADER_START_OFFSET;
286 if (nextDirectoryOffset >= _data.length) {
287 // Last 4 bytes of IFD reference another IFD with an address that is out of bounds
288 // Note this could have been caused by jhead 1.3 cropping too much
289 return;
290 }
291 // the next directory is of same type as this one
292 processDirectory(directory, nextDirectoryOffset);
293 }
294 }
295
296 private void processMakerNote(int subdirOffset)
297 {
298 // Determine the camera model and makernote format
299 Directory exifDirectory = _metadata.getDirectory(ExifDirectory.class);
300 if (exifDirectory == null) {
301 return;
302 }
303 String cameraModel = exifDirectory.getString(ExifDirectory.TAG_MAKE);
304 if ("OLYMP".equals(new String(_data, subdirOffset, 5))) {
305 // Olympus Makernote
306 processDirectory(_metadata.getDirectory(OlympusMakernoteDirectory.class), subdirOffset + 8);
307 } else if ("NIKON".equalsIgnoreCase(cameraModel)) {
308 if ("Nikon".equals(new String(_data, subdirOffset, 5))) {
309 // Nikon type 1 Makernote
310 processDirectory(_metadata.getDirectory(NikonType1MakernoteDirectory.class), subdirOffset + 8);
311 } else {
312 // Nikon type 2 Makernote
313 processDirectory(_metadata.getDirectory(NikonType2MakernoteDirectory.class), subdirOffset);
314 }
315 } else if ("Canon".equalsIgnoreCase(cameraModel)) {
316 // Canon Makernote
317 processDirectory(_metadata.getDirectory(CanonMakernoteDirectory.class), subdirOffset);
318 } else if ("Casio".equalsIgnoreCase(cameraModel)) {
319 // Casio Makernote
320 processDirectory(_metadata.getDirectory(CasioMakernoteDirectory.class), subdirOffset);
321 } else if ("FUJIFILM".equals(new String(_data, subdirOffset, 8)) || "Fujifilm".equalsIgnoreCase(cameraModel)) {
322 // Fujifile Makernote
323 boolean byteOrderBefore = motorollaByteOrder;
324 // bug in fujifilm makernote ifd means we temporarily use Intel byte ordering
325 motorollaByteOrder = false;
326 // the 4 bytes after "FUJIFILM" in the makernote point to the start of the makernote
327 // IFD, though the offset is relative to the start of the makernote, not the TIFF
328 // header (like everywhere else)
329 int ifdStart = subdirOffset + get32Bits(subdirOffset + 8);
330 processDirectory(_metadata.getDirectory(FujiFilmMakernoteDirectory.class), ifdStart);
331 motorollaByteOrder = byteOrderBefore;
332 }
333 }
334
335 private boolean isDirectoryLengthValid(int dirStartOffset)
336 {
337 int dirTagCount = get16Bits(dirStartOffset);
338 int dirLength = (2 + (12 * dirTagCount) + 4);
339 if (dirLength + dirStartOffset + TIFF_HEADER_START_OFFSET >= _data.length) {
340 // Note: Files that had thumbnails trimmed with jhead 1.3 or earlier might trigger this
341 return false;
342 }
343 return true;
344 }
345
346 private void processTag(Directory directory, int tagType, int tagValueOffset, int componentCount, int formatCode)
347 {
348 // Directory simply stores raw values
349 // The display side uses a Descriptor class per directory to turn the raw values into 'pretty' descriptions
350 switch (formatCode) {
351 case FMT_UNDEFINED:
352 case FMT_STRING:
353 String s = readString(tagValueOffset, componentCount);
354 directory.setString(tagType, s);
355 break;
356 case FMT_SRATIONAL:
357 case FMT_URATIONAL:
358 if (componentCount == 1) {
359 Rational rational = new Rational(get32Bits(tagValueOffset), get32Bits(tagValueOffset + 4));
360 directory.setRational(tagType, rational);
361 } else {
362 Rational[] rationals = new Rational[componentCount];
363 for (int i = 0; i < componentCount; i++) {
364 rationals[i] = new Rational(get32Bits(tagValueOffset + (8 * i)), get32Bits(tagValueOffset + 4 + (8 * i)));
365 }
366 directory.setRationalArray(tagType, rationals);
367 }
368 break;
369 case FMT_SBYTE:
370 case FMT_BYTE:
371 if (componentCount == 1) {
372 // this may need to be a byte, but I think casting to int is fine
373 int b = _data[tagValueOffset];
374 directory.setInt(tagType, b);
375 } else {
376 int[] bytes = new int[componentCount];
377 for (int i = 0; i < componentCount; i++) {
378 bytes[i] = _data[tagValueOffset];
379 }
380 directory.setIntArray(tagType, bytes);
381 }
382 break;
383 case FMT_SINGLE:
384 case FMT_DOUBLE:
385 if (componentCount == 1) {
386 int i = _data[tagValueOffset];
387 directory.setInt(tagType, i);
388 } else {
389 int[] ints = new int[componentCount];
390 for (int i = 0; i < componentCount; i++) {
391 ints[i] = _data[tagValueOffset];
392 }
393 directory.setIntArray(tagType, ints);
394 }
395 break;
396 case FMT_USHORT:
397 case FMT_SSHORT:
398 if (componentCount == 1) {
399 int i = get16Bits(tagValueOffset);
400 directory.setInt(tagType, i);
401 } else {
402 int[] ints = new int[componentCount];
403 for (int i = 0; i < componentCount; i++) {
404 ints[i] = get16Bits(tagValueOffset + (i * 2));
405 }
406 directory.setIntArray(tagType, ints);
407 }
408 break;
409 case FMT_SLONG:
410 case FMT_ULONG:
411 if (componentCount == 1) {
412 int i = get32Bits(tagValueOffset);
413 directory.setInt(tagType, i);
414 } else {
415 int[] ints = new int[componentCount];
416 for (int i = 0; i < componentCount; i++) {
417 ints[i] = get32Bits(tagValueOffset + (i * 4));
418 }
419 directory.setIntArray(tagType, ints);
420 }
421 break;
422 default:
423 directory.addError("unknown format code " + formatCode);
424 }
425 }
426 /*
427 private void processExifTag(int tagType, int tagValueOffset, int componentCount, int formatCode)
428 {
429 switch (tagType) {
430 // TODO test this still works
431 case TAG_USER_COMMENT:
432 // Olympus has this padded with trailing spaces. Remove these first.
433 // ArrayIndexOutOfBoundsException bug fixed by Hendrik Wördehoff - 20 Sep 2002
434 int byteCount = componentCount * BYTES_PER_FORMAT[formatCode];
435 for (int i = byteCount - 1; i >= 0; i--) {
436 if (_data[tagValueOffset + i] == ' ') {
437 _data[tagValueOffset + i] = (byte)'\0';
438 } else {
439 break;
440 }
441 }
442 // Copy the comment
443 if ("ASCII".equals(new String(_data, tagValueOffset, 5))) {
444 for (int i = 5; i < 10; i++) {
445 byte b = _data[tagValueOffset + i];
446 if (b != '\0' && b != ' ') {
447 _metadata.setString(DIRECTORY_EXIF_EXIF, TAG_USER_COMMENT, readString(tagValueOffset + i, 199));
448 break;
449 }
450 }
451 } else {
452 _metadata.setString(DIRECTORY_EXIF_EXIF, TAG_USER_COMMENT, readString(tagValueOffset, 199));
453 }
454 break;
455
456 // TODO work out what to do with this calculation
457 // More relevant info always comes earlier, so only use this field if we don't
458 // have appropriate aperture information yet.
459 case TAG_APERTURE:
460 case TAG_MAX_APERTURE:
461 if (!_metadata.containsTag(DIRECTORY_EXIF_EXIF, TAG_FNUMBER)) {
462 _metadata.setFloat(DIRECTORY_EXIF_EXIF, TAG_FNUMBER, (float)Math.exp(convertTagToNumber(tagValueOffset, formatCode) * Math.log(2) * 0.5));
463 }
464 break;
465
466 // TODO copy these comments somewhere
467 // Simplest way of expressing exposure time, so I trust it most (overwrite previously computd value if there is one)
468 case TAG_EXPOSURE_TIME:
469 // Indicates the distance the autofocus camera is focused to. Tends to be less accurate as distance increases.
470 case TAG_SUBJECT_DISTANCE:
471 // Nice digital cameras actually save the focal length as a function of how far they are zoomed in.
472 case TAG_FOCAL_LENGTH:
473 // Simplest way of expressing aperture, so I trust it the most. (overwrite previously computed value if there is one)
474 case TAG_FNUMBER:
475 case TAG_EXPOSURE_BIAS:
476 _metadata.setFloat(DIRECTORY_EXIF_EXIF, tagType, (float)convertTagToNumber(tagValueOffset, formatCode));
477 break;
478
479 // TODO work out what to do with this calculation
480 // More complicated way of expressing exposure time, so only use this value if we don't already have it from somewhere else.
481 case TAG_SHUTTER_SPEED:
482 if (!_metadata.containsTag(DIRECTORY_EXIF_EXIF, TAG_EXPOSURE_TIME)) {
483 _metadata.setFloat(DIRECTORY_EXIF_EXIF, TAG_EXPOSURE_TIME, (float)(1 / Math.exp(convertTagToNumber(tagValueOffset, formatCode) * Math.log(2))));
484 }
485 break;
486 }
487 }
488 */
489
490 private int calculateTagValueOffset(int byteCount, int dirEntryOffset)
491 {
492 if (byteCount > 4) {
493 // If its bigger than 4 bytes, the dir entry contains an offset.
494 // TODO if we're reading FujiFilm makernote tags, the offset is relative to the start of the makernote itself, not the TIFF segment
495 int offsetVal = get32Bits(dirEntryOffset + 8);
496 if (offsetVal + byteCount > _data.length) {
497 // Bogus pointer offset and / or bytecount value
498 return -1; // signal error
499 }
500 return TIFF_HEADER_START_OFFSET + offsetVal;
501 } else {
502 // 4 bytes or less and value is in the dir entry itself
503 return dirEntryOffset + 8;
504 }
505 }
506
507 /**
508 * Creates a String from the _data buffer starting at the specified offset,
509 * and ending where byte=='\0' or where length==maxLength.
510 */
511 private String readString(int offset, int maxLength)
512 {
513 int length = 0;
514 while ((offset + length) < _data.length && _data[offset + length] != '\0' && length < maxLength) {
515 length++;
516 }
517 return new String(_data, offset, length);
518 }
519
520 /**
521 * Determine the offset at which a given InteropArray entry begins within the specified IFD.
522 * @param ifdStartOffset the offset at which the IFD starts
523 * @param entryNumber the zero-based entry number
524 */
525 private int calculateDirectoryEntryOffset(int ifdStartOffset, int entryNumber)
526 {
527 return (ifdStartOffset + 2 + (12 * entryNumber));
528 }
529
530 /**
531 * Get a 16 bit value from file's native byte order. Between 0x0000 and 0xFFFF.
532 */
533 private int get16Bits(int offset)
534 {
535 if (offset < 0 || offset >= _data.length) {
536 throw new ArrayIndexOutOfBoundsException("attempt to read data outside of exif segment (index " + offset + " where max index is " + (_data.length - 1) + ")");
537 }
538 if (motorollaByteOrder) {
539 // Motorola big first
540 return (_data[offset] << 8 & 0xFF00) | (_data[offset + 1] & 0xFF);
541 } else {
542 // Intel ordering
543 return (_data[offset + 1] << 8 & 0xFF00) | (_data[offset] & 0xFF);
544 }
545 }
546
547 /**
548 * Get a 32 bit value from file's native byte order.
549 */
550 private int get32Bits(int offset)
551 {
552 if (offset < 0 || offset >= _data.length) {
553 throw new ArrayIndexOutOfBoundsException("attempt to read data outside of exif segment (index " + offset + " where max index is " + (_data.length - 1) + ")");
554 }
555 // TODO report this bug in IntelliJ -- 0xFF000000 too large ???
556 if (motorollaByteOrder) {
557 // Motorola big first
558 return (_data[offset] << 24 & 0xFF000000) |
559 (_data[offset + 1] << 16 & 0xFF0000) |
560 (_data[offset + 2] << 8 & 0xFF00) |
561 (_data[offset + 3] & 0xFF);
562 } else {
563 // Intel ordering
564 return (_data[offset + 3] << 24 & 0xFF000000) |
565 (_data[offset + 2] << 16 & 0xFF0000) |
566 (_data[offset + 1] << 8 & 0xFF00) |
567 (_data[offset] & 0xFF);
568 }
569 }
570 }