1 package tim.prune.jpeg.drew;
\r
4 import java.util.HashMap;
\r
6 import tim.prune.jpeg.ExifGateway;
\r
7 import tim.prune.jpeg.JpegData;
\r
10 * Extracts Exif data from a JPEG header segment
\r
11 * Based on Drew Noakes' Metadata extractor at http://drewnoakes.com
\r
12 * which in turn is based on code from Jhead http://www.sentex.net/~mwandel/jhead/
\r
14 public class ExifReader
\r
16 /** The JPEG segment as an array of bytes */
\r
17 private final byte[] _data;
\r
20 * Represents the native byte ordering used in the JPEG segment. If true,
\r
21 * then we're using Motorola ordering (Big endian), else we're using Intel
\r
22 * ordering (Little endian).
\r
24 private boolean _isMotorolaByteOrder;
\r
26 /** Thumbnail offset */
\r
27 private int _thumbnailOffset = -1;
\r
28 /** Thumbnail length */
\r
29 private int _thumbnailLength = -1;
\r
31 /** The number of bytes used per format descriptor */
\r
32 private static final int[] BYTES_PER_FORMAT = {0, 1, 1, 2, 4, 8, 1, 1, 2, 4, 8, 4, 8};
\r
34 /** The number of formats known */
\r
35 private static final int MAX_FORMAT_CODE = 12;
\r
38 // Note: Cannot use the DataFormat enumeration in the case statement that uses these tags.
\r
39 // Is there a better way?
\r
40 //private static final int FMT_BYTE = 1;
\r
41 private static final int FMT_STRING = 2;
\r
42 //private static final int FMT_USHORT = 3;
\r
43 //private static final int FMT_ULONG = 4;
\r
44 private static final int FMT_URATIONAL = 5;
\r
45 //private static final int FMT_SBYTE = 6;
\r
46 //private static final int FMT_UNDEFINED = 7;
\r
47 //private static final int FMT_SSHORT = 8;
\r
48 //private static final int FMT_SLONG = 9;
\r
49 private static final int FMT_SRATIONAL = 10;
\r
50 //private static final int FMT_SINGLE = 11;
\r
51 //private static final int FMT_DOUBLE = 12;
\r
53 public static final int TAG_EXIF_OFFSET = 0x8769;
\r
54 public static final int TAG_INTEROP_OFFSET = 0xA005;
\r
55 public static final int TAG_GPS_INFO_OFFSET = 0x8825;
\r
56 public static final int TAG_MAKER_NOTE = 0x927C;
\r
58 public static final int TIFF_HEADER_START_OFFSET = 6;
\r
60 /** GPS tag version GPSVersionID 0 0 BYTE 4 */
\r
61 public static final int TAG_GPS_VERSION_ID = 0x0000;
\r
62 /** North or South Latitude GPSLatitudeRef 1 1 ASCII 2 */
\r
63 public static final int TAG_GPS_LATITUDE_REF = 0x0001;
\r
64 /** Latitude GPSLatitude 2 2 RATIONAL 3 */
\r
65 public static final int TAG_GPS_LATITUDE = 0x0002;
\r
66 /** East or West Longitude GPSLongitudeRef 3 3 ASCII 2 */
\r
67 public static final int TAG_GPS_LONGITUDE_REF = 0x0003;
\r
68 /** Longitude GPSLongitude 4 4 RATIONAL 3 */
\r
69 public static final int TAG_GPS_LONGITUDE = 0x0004;
\r
70 /** Altitude reference GPSAltitudeRef 5 5 BYTE 1 */
\r
71 public static final int TAG_GPS_ALTITUDE_REF = 0x0005;
\r
72 /** Altitude GPSAltitude 6 6 RATIONAL 1 */
\r
73 public static final int TAG_GPS_ALTITUDE = 0x0006;
\r
74 /** GPS time (atomic clock) GPSTimeStamp 7 7 RATIONAL 3 */
\r
75 public static final int TAG_GPS_TIMESTAMP = 0x0007;
\r
76 /** GPS date (atomic clock) GPSDateStamp 23 1d RATIONAL 3 */
\r
77 public static final int TAG_GPS_DATESTAMP = 0x001d;
\r
78 /** "Original" Exif timestamp */
\r
79 public static final int TAG_DATETIME_ORIGINAL = 0x9003;
\r
80 /** "Creation" or "Digitized" timestamp */
\r
81 public static final int TAG_DATETIME_DIGITIZED = 0x9004;
\r
82 /** Thumbnail offset */
\r
83 private static final int TAG_THUMBNAIL_OFFSET = 0x0201;
\r
84 /** Thumbnail length */
\r
85 private static final int TAG_THUMBNAIL_LENGTH = 0x0202;
\r
86 /** Orientation of image */
\r
87 private static final int TAG_ORIENTATION = 0x0112;
\r
88 /** Bearing direction of image */
\r
89 private static final int TAG_BEARING = 0x0011;
\r
93 * Creates an ExifReader for a Jpeg file
\r
94 * @param inFile File object to attempt to read from
\r
95 * @throws JpegException on failure
\r
97 public ExifReader(File inFile) throws JpegException
\r
99 _data = JpegSegmentReader.readExifSegment(inFile);
\r
103 * Performs the Exif data extraction
\r
104 * @return the GPS data found in the file
\r
106 public JpegData extract()
\r
108 JpegData metadata = new JpegData();
\r
112 // check for the header length
\r
113 if (_data.length<=14)
\r
115 metadata.addError("Exif data segment must contain at least 14 bytes");
\r
119 // check for the header preamble
\r
120 if (!"Exif\0\0".equals(new String(_data, 0, 6)))
\r
122 metadata.addError("Exif data segment doesn't begin with 'Exif'");
\r
126 // this should be either "MM" or "II"
\r
127 String byteOrderIdentifier = new String(_data, 6, 2);
\r
128 if (!setByteOrder(byteOrderIdentifier))
\r
130 metadata.addError("Unclear distinction between Motorola/Intel byte ordering: " + byteOrderIdentifier);
\r
134 // Check the next two values are 0x2A as expected
\r
135 if (get16Bits(8)!=0x2a)
\r
137 metadata.addError("Invalid Exif start - should have 0x2A at offset 8 in Exif header");
\r
141 int firstDirectoryOffset = get32Bits(10) + TIFF_HEADER_START_OFFSET;
\r
143 // Check that offset is within range
\r
144 if (firstDirectoryOffset>=_data.length - 1)
\r
146 metadata.addError("First exif directory offset is beyond end of Exif data segment");
\r
147 // First directory normally starts 14 bytes in -- try it here and catch another error in the worst case
\r
148 firstDirectoryOffset = 14;
\r
151 HashMap<Integer, String> processedDirectoryOffsets = new HashMap<Integer, String>();
\r
153 // 0th IFD (we merge with Exif IFD)
\r
154 processDirectory(metadata, false, processedDirectoryOffsets, firstDirectoryOffset, TIFF_HEADER_START_OFFSET);
\r
161 * Set the byte order identifier
\r
162 * @param byteOrderIdentifier String from exif
\r
163 * @return true if recognised, false otherwise
\r
165 private boolean setByteOrder(String byteOrderIdentifier)
\r
167 if ("MM".equals(byteOrderIdentifier)) {
\r
168 _isMotorolaByteOrder = true;
\r
169 } else if ("II".equals(byteOrderIdentifier)) {
\r
170 _isMotorolaByteOrder = false;
\r
179 * Recursive call to process one of the nested Tiff IFD directories.
\r
180 * 2 bytes: number of tags
\r
182 * 2 bytes: tag type
\r
183 * 2 bytes: format code
\r
184 * 4 bytes: component count
\r
186 private void processDirectory(JpegData inMetadata, boolean inIsGPS, HashMap<Integer, String> inDirectoryOffsets,
\r
187 int inDirOffset, int inTiffHeaderOffset)
\r
189 // check for directories we've already visited to avoid stack overflows when recursive/cyclic directory structures exist
\r
190 if (inDirectoryOffsets.containsKey(Integer.valueOf(inDirOffset)))
\r
193 // remember that we've visited this directory so that we don't visit it again later
\r
194 inDirectoryOffsets.put(Integer.valueOf(inDirOffset), "processed");
\r
196 if (inDirOffset >= _data.length || inDirOffset < 0)
\r
198 inMetadata.addError("Ignored directory marked to start outside data segment");
\r
202 // First two bytes in the IFD are the number of tags in this directory
\r
203 int dirTagCount = get16Bits(inDirOffset);
\r
204 // If no tags, exit without complaint
\r
205 if (dirTagCount == 0) return;
\r
207 if (!isDirectoryLengthValid(inDirOffset, inTiffHeaderOffset))
\r
209 inMetadata.addError("Directory length is not valid");
\r
213 inMetadata.setExifDataPresent();
\r
214 // Handle each tag in this directory
\r
215 for (int tagNumber = 0; tagNumber<dirTagCount; tagNumber++)
\r
217 final int tagOffset = calculateTagOffset(inDirOffset, tagNumber);
\r
219 // 2 bytes for the tag type
\r
220 final int tagType = get16Bits(tagOffset);
\r
222 // 2 bytes for the format code
\r
223 final int formatCode = get16Bits(tagOffset + 2);
\r
224 if (formatCode < 1 || formatCode > MAX_FORMAT_CODE)
\r
226 inMetadata.addError("Invalid format code: " + formatCode);
\r
230 // 4 bytes dictate the number of components in this tag's data
\r
231 final int componentCount = get32Bits(tagOffset + 4);
\r
232 if (componentCount < 0)
\r
234 inMetadata.addError("Negative component count in EXIF");
\r
237 // each component may have more than one byte... calculate the total number of bytes
\r
238 final int byteCount = componentCount * BYTES_PER_FORMAT[formatCode];
\r
239 final int tagValueOffset = calculateTagValueOffset(byteCount, tagOffset, inTiffHeaderOffset);
\r
240 if (tagValueOffset < 0 || tagValueOffset > _data.length)
\r
242 inMetadata.addError("Illegal pointer offset value in EXIF");
\r
246 // Check that this tag isn't going to allocate outside the bounds of the data array.
\r
247 // This addresses an uncommon OutOfMemoryError.
\r
248 if (byteCount < 0 || tagValueOffset + byteCount > _data.length)
\r
250 inMetadata.addError("Illegal number of bytes: " + byteCount);
\r
254 // Calculate the value as an offset for cases where the tag represents a directory
\r
255 final int subdirOffset = inTiffHeaderOffset + get32Bits(tagValueOffset);
\r
257 // Look in both basic Exif tags (for timestamp, thumbnail) and Gps tags (for lat, long, altitude, timestamp)
\r
260 case TAG_EXIF_OFFSET:
\r
261 processDirectory(inMetadata, false, inDirectoryOffsets, subdirOffset, inTiffHeaderOffset);
\r
263 case TAG_INTEROP_OFFSET:
\r
266 case TAG_GPS_INFO_OFFSET:
\r
267 processDirectory(inMetadata, true, inDirectoryOffsets, subdirOffset, inTiffHeaderOffset);
\r
269 case TAG_MAKER_NOTE:
\r
273 // not a known directory, so must just be a normal tag
\r
276 processGpsTag(inMetadata, tagType, tagValueOffset, componentCount, formatCode);
\r
280 processExifTag(inMetadata, tagType, tagValueOffset, componentCount, formatCode);
\r
286 // at the end of each IFD is an optional link to the next IFD
\r
287 final int finalTagOffset = calculateTagOffset(inDirOffset, dirTagCount);
\r
288 int nextDirectoryOffset = get32Bits(finalTagOffset);
\r
289 if (nextDirectoryOffset != 0)
\r
291 nextDirectoryOffset += inTiffHeaderOffset;
\r
292 if (nextDirectoryOffset>=_data.length)
\r
294 // Last 4 bytes of IFD reference another IFD with an address that is out of bounds
\r
297 else if (nextDirectoryOffset < inDirOffset)
\r
299 // Last 4 bytes of IFD reference another IFD with an address before the start of this directory
\r
302 // the next directory is of same type as this one
\r
303 processDirectory(inMetadata, false, inDirectoryOffsets, nextDirectoryOffset, inTiffHeaderOffset);
\r
309 * Check if the directory length is valid
\r
310 * @param dirStartOffset start offset for directory
\r
311 * @param tiffHeaderOffset Tiff header offeset
\r
312 * @return true if length is valid
\r
314 private boolean isDirectoryLengthValid(int inDirStartOffset, int inTiffHeaderOffset)
\r
316 int dirTagCount = get16Bits(inDirStartOffset);
\r
317 int dirLength = (2 + (12 * dirTagCount) + 4);
\r
318 if (dirLength + inDirStartOffset + inTiffHeaderOffset >= _data.length)
\r
320 // Note: Files that had thumbnails trimmed with jhead 1.3 or earlier might trigger this
\r
328 * Process a GPS tag and put the contents in the given metadata
\r
329 * @param inMetadata metadata holding extracted values
\r
330 * @param inTagType tag type (eg latitude)
\r
331 * @param inTagValueOffset start offset in data array
\r
332 * @param inComponentCount component count for tag
\r
333 * @param inFormatCode format code, eg byte
\r
335 private void processGpsTag(JpegData inMetadata, int inTagType, int inTagValueOffset,
\r
336 int inComponentCount, int inFormatCode)
\r
340 // Only interested in tags latref, lat, longref, lon, altref, alt and gps timestamp
\r
343 case TAG_GPS_LATITUDE_REF:
\r
344 inMetadata.setLatitudeRef(readString(inTagValueOffset, inFormatCode, inComponentCount));
\r
346 case TAG_GPS_LATITUDE:
\r
347 Rational[] latitudes = readRationalArray(inTagValueOffset, inFormatCode, inComponentCount);
\r
348 inMetadata.setLatitude(new double[] {latitudes[0].doubleValue(), latitudes[1].doubleValue(),
\r
349 ExifGateway.convertToPositiveValue(latitudes[2].getNumerator(), latitudes[2].getDenominator())});
\r
351 case TAG_GPS_LONGITUDE_REF:
\r
352 inMetadata.setLongitudeRef(readString(inTagValueOffset, inFormatCode, inComponentCount));
\r
354 case TAG_GPS_LONGITUDE:
\r
355 Rational[] longitudes = readRationalArray(inTagValueOffset, inFormatCode, inComponentCount);
\r
356 inMetadata.setLongitude(new double[] {longitudes[0].doubleValue(), longitudes[1].doubleValue(),
\r
357 ExifGateway.convertToPositiveValue(longitudes[2].getNumerator(), longitudes[2].getDenominator())});
\r
359 case TAG_GPS_ALTITUDE_REF:
\r
360 inMetadata.setAltitudeRef(_data[inTagValueOffset]);
\r
362 case TAG_GPS_ALTITUDE:
\r
363 inMetadata.setAltitude(readRational(inTagValueOffset, inFormatCode, inComponentCount).intValue());
\r
365 case TAG_GPS_TIMESTAMP:
\r
366 Rational[] times = readRationalArray(inTagValueOffset, inFormatCode, inComponentCount);
\r
367 inMetadata.setGpsTimestamp(new int[] {times[0].intValue(), times[1].intValue(), times[2].intValue()});
\r
369 case TAG_GPS_DATESTAMP:
\r
370 Rational[] dates = readRationalArray(inTagValueOffset, inFormatCode, inComponentCount);
\r
371 if (dates != null) {
\r
372 inMetadata.setGpsDatestamp(new int[] {dates[0].intValue(), dates[1].intValue(), dates[2].intValue()});
\r
376 // Not in rational array format, but maybe as String?
\r
377 String date = readString(inTagValueOffset, inFormatCode, inComponentCount);
\r
378 if (date != null && date.length() == 10) {
\r
379 inMetadata.setGpsDatestamp(new int[] {Integer.parseInt(date.substring(0, 4)),
\r
380 Integer.parseInt(date.substring(5, 7)), Integer.parseInt(date.substring(8))});
\r
385 Rational val = readRational(inTagValueOffset, inFormatCode, inComponentCount);
\r
387 inMetadata.setBearing(val.doubleValue());
\r
390 default: // ignore all other tags
\r
393 catch (Exception e) {} // ignore and continue
\r
398 * Process a general Exif tag
\r
399 * @param inMetadata metadata holding extracted values
\r
400 * @param inTagType tag type (eg latitude)
\r
401 * @param inTagValueOffset start offset in data array
\r
402 * @param inComponentCount component count for tag
\r
403 * @param inFormatCode format code, eg byte
\r
405 private void processExifTag(JpegData inMetadata, int inTagType, int inTagValueOffset,
\r
406 int inComponentCount, int inFormatCode)
\r
408 // Only interested in original timestamp, thumbnail offset and thumbnail length
\r
409 if (inTagType == TAG_DATETIME_ORIGINAL) {
\r
410 inMetadata.setOriginalTimestamp(readString(inTagValueOffset, inFormatCode, inComponentCount));
\r
412 else if (inTagType == TAG_DATETIME_DIGITIZED) {
\r
413 inMetadata.setDigitizedTimestamp(readString(inTagValueOffset, inFormatCode, inComponentCount));
\r
415 else if (inTagType == TAG_THUMBNAIL_OFFSET) {
\r
416 _thumbnailOffset = TIFF_HEADER_START_OFFSET + get16Bits(inTagValueOffset);
\r
417 extractThumbnail(inMetadata);
\r
419 else if (inTagType == TAG_THUMBNAIL_LENGTH) {
\r
420 _thumbnailLength = get16Bits(inTagValueOffset);
\r
421 extractThumbnail(inMetadata);
\r
423 else if (inTagType == TAG_ORIENTATION) {
\r
424 if (inMetadata.getOrientationCode() < 1) {
\r
425 inMetadata.setOrientationCode(get16Bits(inTagValueOffset));
\r
431 * Attempt to extract the thumbnail image
\r
433 private void extractThumbnail(JpegData inMetadata)
\r
435 if (_thumbnailOffset > 0 && _thumbnailLength > 0 && inMetadata.getThumbnailImage() == null)
\r
437 byte[] thumbnailBytes = new byte[_thumbnailLength];
\r
438 System.arraycopy(_data, _thumbnailOffset, thumbnailBytes, 0, _thumbnailLength);
\r
439 inMetadata.setThumbnailImage(thumbnailBytes);
\r
445 * Calculate the tag value offset
\r
446 * @param inByteCount
\r
447 * @param inDirEntryOffset
\r
448 * @param inTiffHeaderOffset
\r
449 * @return new offset
\r
451 private int calculateTagValueOffset(int inByteCount, int inDirEntryOffset, int inTiffHeaderOffset)
\r
453 if (inByteCount > 4)
\r
455 // If it's bigger than 4 bytes, the dir entry contains an offset.
\r
456 // dirEntryOffset must be passed, as some makers (e.g. FujiFilm) incorrectly use an
\r
457 // offset relative to the start of the makernote itself, not the TIFF segment.
\r
458 final int offsetVal = get32Bits(inDirEntryOffset + 8);
\r
459 if (offsetVal + inByteCount > _data.length)
\r
461 // Bogus pointer offset and / or bytecount value
\r
462 return -1; // signal error
\r
464 return inTiffHeaderOffset + offsetVal;
\r
468 // 4 bytes or less and value is in the dir entry itself
\r
469 return inDirEntryOffset + 8;
\r
475 * Creates a String from the _data buffer starting at the specified offset,
\r
476 * and ending where byte=='\0' or where length==maxLength.
\r
477 * @param inOffset start offset
\r
478 * @param inFormatCode format code - should be string
\r
479 * @param inMaxLength max length of string
\r
480 * @return contents of tag, or null if format incorrect
\r
482 private String readString(int inOffset, int inFormatCode, int inMaxLength)
\r
484 if (inFormatCode != FMT_STRING) return null;
\r
485 // Calculate length
\r
487 while ((inOffset + length)<_data.length
\r
488 && _data[inOffset + length]!='\0'
\r
489 && length < inMaxLength)
\r
493 return new String(_data, inOffset, length);
\r
497 * Creates a Rational from the _data buffer starting at the specified offset
\r
498 * @param inOffset start offset
\r
499 * @param inFormatCode format code - should be srational or urational
\r
500 * @param inCount component count - should be 1
\r
501 * @return contents of tag as a Rational object
\r
503 private Rational readRational(int inOffset, int inFormatCode, int inCount)
\r
505 // Check the format is a single rational as expected
\r
506 if (inFormatCode != FMT_SRATIONAL && inFormatCode != FMT_URATIONAL
\r
507 || inCount != 1) return null;
\r
508 return new Rational(get32Bits(inOffset), get32Bits(inOffset + 4));
\r
513 * Creates a Rational array from the _data buffer starting at the specified offset
\r
514 * @param inOffset start offset
\r
515 * @param inFormatCode format code - should be srational or urational
\r
516 * @param inCount component count - number of components
\r
517 * @return contents of tag as an array of Rational objects
\r
519 private Rational[] readRationalArray(int inOffset, int inFormatCode, int inCount)
\r
521 // Check the format is rational as expected
\r
522 if (inFormatCode != FMT_SRATIONAL && inFormatCode != FMT_URATIONAL)
\r
524 // Build array of Rationals
\r
525 Rational[] answer = new Rational[inCount];
\r
526 for (int i=0; i<inCount; i++)
\r
527 answer[i] = new Rational(get32Bits(inOffset + (8 * i)), get32Bits(inOffset + 4 + (8 * i)));
\r
533 * Determine the offset at which a given InteropArray entry begins within the specified IFD.
\r
534 * @param dirStartOffset the offset at which the IFD starts
\r
535 * @param entryNumber the zero-based entry number
\r
537 private int calculateTagOffset(int dirStartOffset, int entryNumber)
\r
539 // add 2 bytes for the tag count
\r
540 // each entry is 12 bytes, so we skip 12 * the number seen so far
\r
541 return dirStartOffset + 2 + (12 * entryNumber);
\r
546 * Get a 16 bit value from file's native byte order. Between 0x0000 and 0xFFFF.
\r
548 private int get16Bits(int offset)
\r
550 if (offset<0 || offset+2>_data.length)
\r
551 throw new ArrayIndexOutOfBoundsException("attempt to read data outside of exif segment (index "
\r
552 + offset + " where max index is " + (_data.length - 1) + ")");
\r
554 if (_isMotorolaByteOrder) {
\r
555 // Motorola - MSB first
\r
556 return (_data[offset] << 8 & 0xFF00) | (_data[offset + 1] & 0xFF);
\r
558 // Intel ordering - LSB first
\r
559 return (_data[offset + 1] << 8 & 0xFF00) | (_data[offset] & 0xFF);
\r
565 * Get a 32 bit value from file's native byte order.
\r
567 private int get32Bits(int offset)
\r
569 if (offset < 0 || offset+4 > _data.length)
\r
570 throw new ArrayIndexOutOfBoundsException("attempt to read data outside of exif segment (index "
\r
571 + offset + " where max index is " + (_data.length - 1) + ")");
\r
573 if (_isMotorolaByteOrder)
\r
575 // Motorola - MSB first
\r
576 return (_data[offset] << 24 & 0xFF000000) |
\r
577 (_data[offset + 1] << 16 & 0xFF0000) |
\r
578 (_data[offset + 2] << 8 & 0xFF00) |
\r
579 (_data[offset + 3] & 0xFF);
\r
583 // Intel ordering - LSB first
\r
584 return (_data[offset + 3] << 24 & 0xFF000000) |
\r
585 (_data[offset + 2] << 16 & 0xFF0000) |
\r
586 (_data[offset + 1] << 8 & 0xFF00) |
\r
587 (_data[offset] & 0xFF);
\r