Source code: com/flexstor/flexdbserver/services/asset/iptc/EPSParser.java
1 /*
2 * EPSParser.java
3 *
4 * Copyright $Date: 2003/08/11 02:22:34 $ FLEXSTOR.net Inc.
5 *
6 * This work is licensed for use and distribution under license terms found at
7 * http://www.flexstor.org/license.html
8 *
9 */
10
11 package com.flexstor.flexdbserver.services.asset.iptc;
12
13 import java.io.IOException;
14 import java.util.Hashtable;
15
16 import com.flexstor.common.io.xfile.FlexXFile;
17 import com.flexstor.common.io.xfile.FlexXRandomAccessFile;
18
19 /**
20 *
21 *
22 */
23 public class EPSParser extends IPTCParser implements IIPTCParser
24 {
25 FlexXRandomAccessFile refFile = null;
26
27 /** DEBUG **/
28 boolean bDebug = false;
29
30
31 /**
32 *
33 *
34 */
35 public EPSParser(FlexXFile refXFile)
36 {
37 // Open the input file for random read access
38 try
39 {
40 refFile = new FlexXRandomAccessFile(refXFile, "r");
41 }
42
43 catch(IOException ioe)
44 {
45 BGPError("Could not open random access file.");
46 }
47
48 } // constructor
49
50
51 /**
52 *
53 *
54 */
55 public boolean parseFile()
56 {
57 boolean bResult = true;
58
59 // Read EFS file header, attempting to locate the resource block
60 // This block starts with "8BIM", but it is encoded in the device-independent
61 // bitmap so it is encoded in binary hex.
62 // The resource block should start just after the normal EFS header
63 if (parseEfsHeader() == false)
64 {
65 bResult = false;
66 }
67
68 // Close the input file
69 if (refFile != null)
70 {
71 try
72 {
73 refFile.close();
74 }
75
76 catch(IOException ioe)
77 {
78 System.out.println("EPS Parser error closing file: " + ioe.toString());
79 }
80 }
81
82 return bResult;
83 } // parseEFS
84
85
86 /**
87 *
88 *
89 */
90 protected boolean parseEfsHeader()
91 {
92 boolean bResult = true;
93 boolean bContinue = true;
94 long nSignatureCheckPos = -1;
95
96 // This counter is a sanity count in case the file is corrupt and the
97 // EFS header is not found. Loop will be terminated if too many
98 // percent chars are found.
99 int nPercentCount = 200;
100
101 while(bContinue)
102 {
103 if (findChar('%') == false)
104 {
105 bResult = false;
106 bContinue = false;
107 }
108
109 String sNextChar = getNextChar();
110 if (sNextChar == null)
111 {
112 bResult = false;
113 bContinue = false;
114 }
115
116 if ((bContinue == true) && (sNextChar.equals("%") == false) && (sNextChar.equals("!") == false))
117 {
118 // Check for "8BIM" with or without leading spaces
119 // Save the current position in case the signature not found so
120 // we can look for the next "%" sequence
121
122 // Filter out any spaces in this loop
123 while (sNextChar.equals(" ") == true)
124 {
125 sNextChar = getNextChar();
126 if (sNextChar == null)
127 {
128 bResult = false;
129 bContinue = false;
130 break;
131 }
132 } // while
133
134 nSignatureCheckPos = savePointer();
135 if (nSignatureCheckPos == -1)
136 {
137 bResult = false;
138 bContinue = false;
139 }
140
141 // Was the last character read the first byte of the "*BIM" string?
142 if ((bContinue == true) && (sNextChar.equals("3") == true))
143 {
144
145 // Check the next bytes for "8BIM" ([20] 33 38 34 32 34 39 34 44)
146 // We already found the first byte (0x33)
147 // Set up the reference array
148 byte anRefData[] = {0x38, 0x34, 0x32, 0x34, 0x39, 0x34, 0x44};
149
150 // Read 7 bytes from the file
151 byte[] anData = new byte[7];
152 anData = readBytes(7);
153 if (anData == null)
154 {
155 bResult = false;
156 bContinue = false;
157 }
158
159 // Compare the file data with the reference array
160 bResult = true;
161 for (int i=0; i<anData.length; i++)
162 {
163 if (anData[i] != anRefData[i])
164 {
165 bResult = false;
166 break;
167 }
168 } // for i
169
170 } // sNextChar = "3" if
171
172 else
173 {
174 // First byte of "8BIM" not found
175 bResult = false;
176 }
177
178 if ((bContinue == true) && (bResult == true))
179 {
180
181 // The resource block has been found
182 byte abIPTCData[] = parseResourceBlock();
183 if (abIPTCData != null)
184 {
185 ABNDebug("Parsing IPTC data....");
186 if (parseIPTCdata(abIPTCData) == true)
187 {
188 ABNDebug("IPTC data parsed.");
189 bResult = true;
190 }
191
192 else
193 {
194 ABNDebug("Error parsing IPTC data.");
195 bResult = false;
196 }
197 } // abIPTCData
198
199 else
200 {
201 bResult = false;
202 }
203
204 bContinue = false;
205 } // bResult if
206
207 else if ((bContinue == true) && (nSignatureCheckPos != -1))
208 {
209 // Continue checking for the next single "%"
210 if (restorePointer(nSignatureCheckPos) == false)
211 {
212 bResult = false;
213 bContinue = false;
214 }
215 }
216 }
217
218 // Check the sanity count, if head not found before count goes
219 // negative, something must be wrong so quit
220 if (--nPercentCount < 0)
221 {
222 BGPError("Too many percent chars found before Resource block header");
223 bResult = false;
224 bContinue = false;
225 }
226 } // while
227
228 return bResult;
229 } // parseEfsHeader
230
231
232 /**
233 *
234 *
235 */
236 protected byte[] parseResourceBlock()
237 {
238 byte abResourceBlockData[] = null;
239
240 // The first two bytes are the resource ID
241 // The IPTC resource ID is 0x0404 which is what we want
242
243 // NOTE: For now, assume that the first resource is the IPTC block!
244 // The problem is that the resource block is binary hex coded and in the
245 // form of a device independent preview. This means that there are two
246 // bytes for each ascii iptc character and that the block is segmented into
247 // records of arbitrary length which are defined by binary characters. The
248 // length of the binary record is fixed for each file but has no relation to the
249 // iptc record content. Therefore, block and record lengths in the iptc headers
250 // do not directly relate to the actual size of the blocks and records as
251 // read from the file.
252
253 // Check the first 2 bytes for the IPTC resource ID (0x0404)
254 byte[] anByteBuffer = readBytes(4);
255 if (anByteBuffer == null)
256 {
257 BGPError("Could not read IPTC resource ID");
258 return null;
259 }
260
261 String sResId = binaryHexToString(anByteBuffer);
262 if ((sResId == null) || (sResId.equals("0404") == false))
263 {
264 BGPError("IPTC resource ID not found");
265 return null;
266 }
267
268 // The next "n" bytes contains the Name of the resource block
269 // It is a PString so the first byte is the length. A null string
270 // here will contain a 0 length with a 0 byte following. We don't
271 // particularily care about this string so just get the length and
272 // skip over it.
273 anByteBuffer = readBytes(2);
274 String sCount = binaryHexToChar(anByteBuffer);
275 if (sCount.equals("00") == false)
276 {
277 // The PString is not zero length so skip count bytes
278 int nCount = parseStrToInt(sCount);
279 if (nCount == -1)
280 {
281 return null;
282 }
283
284 // Skip 2 x nCount bytes since there are 2 bytes per ascii char
285 if (skipBytes(2*nCount) == false)
286 {
287 BGPError("Error skipping PString");
288 return null;
289 }
290 } // nCount if
291
292 else
293 {
294 // The PString is a zero length string so the next char (2 bytes)
295 // will be ascii "0". Just skip over them
296 if (skipBytes(2) == false)
297 {
298 BGPError("Error skipping PString");
299 return null;
300 }
301 }
302
303 // The next 4 bytes contains the length of the IPTC resource block.
304 // Note that this is the length before it was encoded in a device independent
305 // preview using binary hex, so it is not an accurate representation of the
306 // actual number of bytes in the resource block in the file. Save it for
307 // later approximations.
308 anByteBuffer = readBytes(8);
309 String sResourceCount = binaryHexToString(anByteBuffer);
310 if (sResourceCount == null)
311 {
312 BGPError("Could not binaryHex convert resource block count");
313 return null;
314 }
315
316 int nResourceCount = parseStrToInt(sResourceCount, 16);
317
318 // The block length is always even and, therefore, possibly padded but the length given
319 // in the header is the pre-padded length.
320 // Adjust the length count to recognize the padding.
321 if ((nResourceCount % 2) != 0)
322 {
323 ++nResourceCount;
324 }
325 ABNDebug("Resource block length: " + nResourceCount);
326
327 // Save the file position as a possible reference point for the resource byte count
328 long nResourceBlockStart = savePointer();
329 if (nResourceBlockStart == -1)
330 {
331 return null;
332 }
333
334 // Process the IPTC resource tags
335 abResourceBlockData = parseIptcTags(nResourceCount);
336
337 return abResourceBlockData;
338 } // parseResouceBlock
339
340
341 /**
342 *
343 *
344 */
345 protected byte[] parseIptcTags(int nResourceCount)
346 {
347 byte[] abTagData = null;
348
349 // IPTC resource tag format:
350 // tag marker 0x1c 1 byte
351 // record # n 1 byte
352 // dataset # n 1 byte
353 // data count n 2 bytes
354 // data xxx y bytes y = f(data count, separators)
355 //
356 // Remember that due to the encoding the data count does not account for
357 // the two-byte ascii chars and the embedded device-independent preview
358 // record separators. Not knowing the exact data length we will parse the
359 // data byte-by-byte stripping the record separators and converting the
360 // binaryhex to ascii. The resource block will be terminated with "8BIM"
361
362 byte[] anByteBuffer = null;
363 String sTagString = "";
364 boolean bContinue = true;
365 int nBufferLength = 512;
366
367 while (bContinue == true)
368 {
369 // Load the buffer as many times as necessary to process the whole
370 // resource block. Convert the binaryhex chars to a string.
371 // Scan for block terminator "8BIM"
372
373 anByteBuffer = readBytes(nBufferLength);
374 if (anByteBuffer == null)
375 {
376 return null;
377 }
378
379 // A tag is expected next, the tag marker is 0x1c
380 String sTempString = binaryHexToString(anByteBuffer);
381 if (sTempString == null)
382 {
383 BGPError("Could not binaryHexConvert tag data");
384 return null;
385 }
386
387 // Convert to Ascii string
388 String sRealString = "";
389 String sChar = "";
390 for (int i=0; i<sTempString.length(); i+=2)
391 {
392 byte[] ab = new byte[1];
393 String sSub = sTempString.substring(i, i+2);
394 try
395 {
396 ab[0] = (byte)Integer.parseInt(sSub, 16);
397 }
398
399 catch(NumberFormatException nfe)
400 {
401 //We have found a very strange byte in this char
402 BGPError("Strange byte found in IPTC tag string: " + sSub + ". " + nfe.toString());
403 BGPError("Index: " + i);
404 BGPError("Substring: " + sRealString);
405 BGPError("Tag String: " + sTagString);
406 //Backup index by one so we can retry the second byte of the char
407 // i--;
408 // continue;
409 return null;
410 }
411
412 // Convert the decimal char to a string char
413 sChar = new String(ab);
414
415 sRealString += sChar;
416 } // for i
417
418 // Append the string to the rest
419 sTagString += sRealString;
420
421 // Check for "8BIM"
422 // A string search would be the easiest but it doesn't work, presumably because
423 // there are some non-printable binary bytes (tag markers and tag lengths) in the string.
424 // Thus we do a binary search.
425 // Note that we don't have the buffer boundry problems since we search the whole accumulated
426 // string each time.
427 byte an8BIM[] = {0x38, 0x42, 0x49, 0x4d};
428 byte anTempTagData[] = sTagString.getBytes();
429 int i = 0;
430 while(i < anTempTagData.length - 3)
431 {
432 if ((anTempTagData[i] == an8BIM[0]) &&
433 (anTempTagData[i+1] == an8BIM[1]) &&
434 (anTempTagData[i+2] == an8BIM[2]) &&
435 (anTempTagData[i+3] == an8BIM[3]))
436 {
437 ABNDebug("8BIM found at index: " + i);
438 // The end-of-resource block signature was found
439 // Truncate the string at the signature index
440 // By the way, this index should, finally, be the same as the resource block
441 // length obtained at the beginning of the header (nResourceCount).
442 // Remember that we could not effectively use it before because of the record terminators
443 // embedded in the data defining the arbitrary-length records.
444
445 // Truncate the string
446 sTagString = sTagString.substring(0, i);
447 bContinue = false;
448 break;
449 } // anTempTagData[xxx] if
450
451 // Increment the index for the next byte
452 i++;
453
454 } // while i
455
456 // This is just a rough check to make sure that if the terminator
457 // is missed that we quit before too long. At this point the resource block length
458 // obtained from the head should be accurate since the record teminators have been removed
459 if (sTagString.length() > nResourceCount)
460 {
461 BGPError("The parsed string length has exceeded the resource block length specified in the header: "
462 + sTagString.length() + ", " + nResourceCount);
463 return null;
464 }
465
466 } // while bContinue
467
468
469 // Copy the tag string to the tag data array
470 abTagData = sTagString.getBytes();
471 printTagData(abTagData);
472 return abTagData;
473 } // parseIptcTags
474
475 /**
476 *
477 *
478 */
479 protected boolean isNextString(String sToMatch)
480 {
481 int nLength = sToMatch.length();
482
483 // Read x bytes
484 byte anData[] = readBytes(2*nLength);
485 if (anData == null)
486 {
487 return false;
488 }
489
490 // Convert to string
491 String sTempString = binaryHexToString(anData);
492 if (sTempString == null)
493 {
494 return false;
495 }
496
497 // Compare for match and return result
498 return sTempString.equals(sToMatch);
499 } // isNextString
500
501
502 /**
503 *
504 *
505 */
506 protected byte[] readBytes(int nCount)
507 {
508 byte[] anBytes = new byte[nCount];
509
510 if (refFile == null)
511 {
512 BGPError("File reference is null pointer - readBytes");
513 return null;
514 }
515
516 try
517 {
518 int nStatus = refFile.read(anBytes);
519 if (nStatus == -1)
520 {
521 BGPError("Could not read. End of file");
522 return null;
523 }
524 }
525
526 catch (IOException ioe)
527 {
528 BGPError("Error reading file: " + ioe.toString());
529 return null;
530 }
531
532 return anBytes;
533 } // readBytes
534
535 /**
536 *
537 *
538 */
539 protected String getNextChar()
540 {
541 String sNextChar = null;
542 byte anFileData[] = null;
543
544 // Read a byte
545 anFileData = readBytes(1);
546 if (anFileData == null)
547 {
548 return null;
549 }
550
551 // Convert the binary data to a string
552 sNextChar = new String(anFileData);
553
554 return sNextChar;
555 } // getNextChar
556
557
558 /**
559 *
560 *
561 */
562 protected boolean findChar(char cChar)
563 {
564 boolean bResult = false;
565 String sFileData = "";
566
567 // Walk thru the file looking for the specified character
568 while(true)
569 {
570 sFileData = getNextChar();
571 if (sFileData == null)
572 {
573 BGPError("Could not find the character: " + cChar);
574 bResult = false;
575 break;
576 }
577
578 if (sFileData.charAt(0) == cChar)
579 {
580 ABNDebug("Character " + cChar + " found");
581 bResult = true;
582 break;
583 } // sFileData.equals if
584 } // while
585
586 return bResult;
587 } // findChar
588
589
590 /**
591 *
592 *
593 */
594 protected boolean skipBytes(int nCount)
595 {
596 boolean bResult = true;
597
598 if (refFile == null)
599 {
600 BGPError("File reference is a null pointer - skipBytes");
601 return false;
602 }
603
604 try
605 {
606 refFile.skipBytes(nCount);
607 }
608
609 catch (IOException ioe)
610 {
611 BGPError("File error in skipBytes: " + ioe.toString());
612 bResult = false;
613 }
614 /*
615 catch (EOFException eoe)
616 {
617 BGPError("File error in skipBytes: " + eoe.toString());
618 bResult = false;
619 }
620 */
621 return bResult;
622 } // skipBytes
623
624 /**
625 * Binary hex consists of two consequtive binary hex ascii bytes each of which
626 * when decoded represents half of a hex ascii char.
627 * For example: 33 38 -> 38
628 * This method converts the binary hex bytes to a string character
629 */
630 protected String binaryHexToChar(byte[] abBinary)
631 {
632 // Convert the two binary hex digits to a single hex ascii char
633 String sChar = new String(abBinary);
634 // ABNDebug(sChar);
635
636 return sChar;
637 } // binaryHexToChar
638
639
640 /**
641 *
642 *
643 */
644 protected String binaryHexToString(byte[] abBinaryData)
645 {
646 String sResult = "";
647 String sIntermediate = "";
648 boolean bPercentFound = false;
649 byte[] abTemp = new byte[2];
650
651 // Convert each 2 consecutive bytes to a string character
652 int nIndex = 0;
653 int nLength = abBinaryData.length;
654 while(nIndex < nLength)
655 {
656
657 // Check for straight binary chars representing the terminator
658 // or start of the binhex record, skip them
659 if (abBinaryData[nIndex] == 0xd) // Optional Record terminator
660 {
661 ABNDebug("0d found");
662 nIndex++;
663 continue;
664 }
665
666 if (abBinaryData[nIndex] == 0x0a) // Optional Record terminator
667 {
668 ABNDebug("0a found");
669 nIndex++;
670 continue;
671 }
672
673 if (abBinaryData[nIndex] == 0x25) // "%" record start
674 {
675 ABNDebug("25 (%) found");
676 bPercentFound = true;
677 nIndex++;
678 continue;
679 }
680
681 if ((bPercentFound == true) && (abBinaryData[nIndex] == 0x20))
682 {
683 ABNDebug("20 found immediately after %");
684 bPercentFound = false;
685 nIndex++;
686 continue;
687 }
688
689 bPercentFound = false;
690
691 abTemp[0] = abBinaryData[nIndex++];
692 if (nIndex < nLength)
693 {
694 abTemp[1] = abBinaryData[nIndex++];
695 if (abTemp[1] < 0x30)
696 {
697 BGPError("BinaryHex convert, Second byte not valid (<0x30h): " + abTemp[1] + " decimal.");
698 BGPError("First byte: " + abTemp[0] + " decimal.");
699 return null;
700 }
701 }
702
703 sIntermediate = binaryHexToChar(abTemp);
704
705 // Append the converted char to the return string
706 sResult += sIntermediate;
707 } // while
708
709 ABNDebug("Hex to string conversion: " + sResult);
710
711 return sResult;
712 } // binaryHexToString
713
714 /**
715 *
716 *
717 */
718 protected boolean restorePointer(long nFilePosition)
719 {
720 boolean bResult = true;
721 if (refFile == null)
722 {
723 return false;
724 }
725
726 try
727 {
728 refFile.seek(nFilePosition);
729 }
730
731 catch (IOException ioe)
732 {
733 BGPError("File seek error: " + ioe.toString());
734 bResult = false;
735 }
736
737 return bResult;
738 } // restorePointer
739
740 /**
741 *
742 *
743 */
744 protected long savePointer()
745 {
746 long nPos = -1;
747 if (refFile == null)
748 {
749 return -1;
750 }
751
752 try
753 {
754 nPos = refFile.getFilePointer();
755 }
756
757 catch (IOException ioe)
758 {
759 BGPError("File error getting position: " + ioe.toString());
760 nPos = -1;
761 }
762
763 return nPos;
764 } // savePointer
765
766 /**
767 *
768 *
769 */
770 protected int parseStrToInt(String sNumber)
771 {
772 return parseStrToInt(sNumber, 0);
773 } // parseStrToInt
774
775 /**
776 *
777 *
778 */
779 protected int parseStrToInt(String sNumber, int nRadix)
780 {
781 int nNumber = -1;
782
783 if (nRadix == 0)
784 {
785 nRadix = 10;
786 }
787
788 try
789 {
790 nNumber = Integer.parseInt(sNumber, nRadix);
791 }
792
793 catch(NumberFormatException nfe)
794 {
795 BGPError("Error converting string to int: " + nfe);
796 }
797
798 return nNumber;
799 } // parseStrToInt
800
801
802 /** Obtain the value for the specified Dataset number.
803 * @return the Dataset number value.
804 * @param sDatasetID The IPTC Dataset ID number (A:BB)
805 */
806 public String getDatasetValue( String sDatasetID )
807 {
808 return (String) hDataSets.get( sDatasetID );
809 } // end of getDataset
810
811
812 /**
813 * Debug code
814 *
815 */
816 protected void printTagData(byte[] anTagData)
817 {
818 // Print out the contents of the tag data buffer
819 // Use the hard way since printing as a string does'nt work well due
820 // to the various wierd characters that it may contain.
821 // Replace all chars < 0x02 (except for 0x0d) with an asterisk
822 // Note that the replaced characters are important for this application.
823 // They are the tags.
824 String sBuffer = "";
825 for (int i=0; i<anTagData.length; i++)
826 {
827 byte nChar = anTagData[i];
828 if ((nChar < 0x20) && (nChar != 0x0d))
829 {
830 nChar = 0x2a;
831 }
832
833 sBuffer += (char)nChar;
834 } // for i
835
836 ABNDebug(sBuffer);
837 } // printTagData
838
839
840 /**
841 *
842 *
843 */
844 public void debugPrintHash()
845 {
846 System.out.println("");
847 System.out.println("****** IPTC Hashtable ******");
848 String sValue = getDatasetValue("2:120"); // Caption
849 if (sValue != null)
850 {
851 sValue = putInCRs(sValue);
852 System.out.println(sValue);
853 System.out.println("String Size: " + sValue.length());
854 }
855 sValue = getDatasetValue("2:25"); // Keyword
856 if (sValue != null)
857 {
858 sValue = putInCRs(sValue);
859 System.out.println(sValue);
860 System.out.println("String Size: " + sValue.length());
861 }
862 sValue = getDatasetValue("2:55"); // Date created
863 if (sValue != null)
864 {
865 sValue = putInCRs(sValue);
866 System.out.println(sValue);
867 System.out.println("String Size: " + sValue.length());
868 }
869 } // debugPrintHash
870
871
872 /**
873 *
874 *
875 */
876 public Hashtable getIptcHashtable()
877 {
878 return hDataSets;
879 }
880
881 /**
882 *
883 *
884 */
885 protected String putInCRs(String sInput)
886 {
887 String sOutput = "";
888 byte[] abInput = sInput.getBytes();
889 byte[] abOutput = new byte[abInput.length + 200];
890 int j = 0;
891
892 for (int i=0; i<abInput.length; i++)
893 {
894 if (abInput[i] == 0x0d)
895 {
896 abOutput[j++] = 0x0a;
897 }
898
899 abOutput[j++] = abInput[i];
900 } // for
901
902 sOutput = new String(abOutput, 0, j);
903 return sOutput;
904 } // putInCRs
905
906 /**
907 *
908 *
909 */
910 protected void ABNDebug(String sMsg)
911 {
912 if(bDebug == true)
913 {
914 BGPDebug(sMsg);
915 }
916 }
917
918 } // class EPSParser