Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/flexstor/flexdbserver/services/asset/iptc/EPSParser.java


1   /*
2    * EPSParser.java
3    *
4    * Copyright $Date: 2003/08/11 02:22:34 $ FLEXSTOR.net Inc.
5    *
6    * This work is licensed for use and distribution under license terms found at
7    * http://www.flexstor.org/license.html
8    *
9    */
10  
11  package com.flexstor.flexdbserver.services.asset.iptc;
12  
13  import java.io.IOException;
14  import java.util.Hashtable;
15  
16  import com.flexstor.common.io.xfile.FlexXFile;
17  import com.flexstor.common.io.xfile.FlexXRandomAccessFile;
18  
19  /**
20  *
21  *
22  */
23  public class EPSParser extends IPTCParser implements IIPTCParser 
24  {
25      FlexXRandomAccessFile refFile = null;
26      
27      /**  DEBUG **/
28      boolean bDebug = false;
29      
30      
31      /**
32      *
33      *
34      */
35      public EPSParser(FlexXFile refXFile)
36      {
37          // Open the input file for random read access
38          try
39          {
40              refFile = new FlexXRandomAccessFile(refXFile, "r");    
41          }
42          
43          catch(IOException ioe)
44          {
45              BGPError("Could not open random access file.");
46          }
47  
48      } // constructor
49      
50      
51      /**
52      *
53      *
54      */
55      public boolean parseFile()
56      {
57          boolean bResult = true;
58      
59          // Read EFS file header, attempting to locate the resource block
60          // This block starts with "8BIM", but it is encoded in the device-independent 
61          // bitmap so it is encoded in binary hex.
62          // The resource block should start just after the normal EFS header
63          if (parseEfsHeader() == false)
64          {
65              bResult = false;
66          }
67      
68          // Close the input file
69          if (refFile != null)
70          {
71              try
72              {
73                  refFile.close();
74              }
75              
76              catch(IOException ioe)
77              {
78                  System.out.println("EPS Parser error closing file: " + ioe.toString());
79              }
80          }
81          
82      return bResult;
83      } // parseEFS
84      
85      
86      /**
87      *
88      *
89      */
90      protected boolean parseEfsHeader()
91      {
92          boolean bResult   = true;
93          boolean bContinue = true;
94          long    nSignatureCheckPos = -1;
95          
96          // This counter is a sanity count in case the file is corrupt and the
97          // EFS header is not found. Loop will be terminated if too many 
98          // percent chars are found.
99          int nPercentCount = 200;
100         
101         while(bContinue)
102         {
103             if (findChar('%') == false)
104             {
105                 bResult   = false;
106                 bContinue = false;
107             }
108         
109             String sNextChar = getNextChar();
110             if (sNextChar == null)
111             {
112                 bResult   = false;
113                 bContinue = false;
114             }
115         
116             if ((bContinue == true) && (sNextChar.equals("%") == false) && (sNextChar.equals("!") == false))
117             {                
118                 // Check for "8BIM" with or without leading spaces
119                 // Save the current position in case the signature not found so
120                 // we can look for the next "%" sequence
121                 
122                 // Filter out any spaces in this loop
123                 while (sNextChar.equals(" ") == true)
124                 {
125                     sNextChar = getNextChar();
126                     if (sNextChar == null)
127                     {
128                         bResult   = false;
129                         bContinue = false;
130                         break;
131                     }
132                 } // while
133                 
134                 nSignatureCheckPos = savePointer();
135                 if (nSignatureCheckPos == -1)
136                 {
137                     bResult   = false;
138                     bContinue = false;
139                 }
140                         
141                 // Was the last character read the first byte of the "*BIM" string?
142                 if ((bContinue == true) && (sNextChar.equals("3") == true))
143                 {
144                 
145                     // Check the next bytes for "8BIM" ([20] 33 38 34 32 34 39 34 44)
146                     // We already found the first byte (0x33)
147                     // Set up the reference array
148                     byte anRefData[] = {0x38, 0x34, 0x32, 0x34, 0x39, 0x34, 0x44};
149                     
150                     // Read 7 bytes from the file
151                     byte[] anData = new byte[7];
152                     anData = readBytes(7);
153                     if (anData == null)
154                     {
155                         bResult   = false;
156                         bContinue = false;
157                     }
158         
159                     // Compare the file data with the reference array
160                     bResult = true;
161                     for (int i=0; i<anData.length; i++)
162                     {
163                         if (anData[i] != anRefData[i])
164                         {
165                             bResult = false;
166                             break;
167                         }
168                     } // for i
169                 
170                 } // sNextChar = "3" if
171                 
172                 else
173                 {
174                     // First byte of "8BIM" not found
175                     bResult = false;
176                 }
177 
178                 if ((bContinue == true) && (bResult == true))
179                 {
180                     
181                     // The resource block has been found
182                     byte abIPTCData[] = parseResourceBlock();
183                     if (abIPTCData != null)
184                     {
185                         ABNDebug("Parsing IPTC data....");
186                         if (parseIPTCdata(abIPTCData) == true)
187                         {
188                         ABNDebug("IPTC data parsed.");
189                         bResult = true;
190                         }
191                         
192                         else
193                         {
194                         ABNDebug("Error parsing IPTC  data.");
195                         bResult = false;
196                         }
197                     } // abIPTCData
198                     
199                     else
200                     {
201                         bResult = false;
202                     }
203                     
204                     bContinue = false;
205                 } // bResult if
206             
207                 else if ((bContinue == true) && (nSignatureCheckPos != -1))
208                 {
209                     // Continue checking for the next single "%"
210                     if (restorePointer(nSignatureCheckPos) == false)                    
211                     {
212                         bResult   = false;
213                         bContinue = false;
214                     }
215                 }
216             }
217         
218             // Check the sanity count, if head not found before count goes
219             // negative, something must be wrong so quit
220             if (--nPercentCount < 0)
221             {
222                 BGPError("Too many percent chars found before Resource block header");
223                 bResult   = false;
224                 bContinue = false;
225             }
226         } // while
227         
228         return bResult;
229     } // parseEfsHeader
230     
231     
232     /**
233     *
234     *
235     */
236     protected byte[] parseResourceBlock()
237     {
238         byte abResourceBlockData[] = null;
239         
240         // The first two bytes are the resource ID
241         // The IPTC resource ID is 0x0404 which is what we want
242         
243         // NOTE: For now, assume that the first resource is the IPTC block!
244         // The problem is that the resource block is binary hex coded and in the
245         // form of a device independent preview.  This means that there are two
246         // bytes for each ascii iptc character and that the block is segmented into
247         // records of arbitrary length which are defined by binary characters.  The
248         // length of the binary record is fixed for each file but has no relation to the
249         // iptc record content.  Therefore, block and record lengths in the iptc headers
250         // do not directly relate to the actual size of the blocks and records as
251         // read from the file.
252         
253         // Check the first 2 bytes for the IPTC resource ID (0x0404)
254         byte[] anByteBuffer = readBytes(4);
255         if (anByteBuffer == null)
256         {
257             BGPError("Could not read IPTC resource ID");
258             return null;
259         }
260         
261         String sResId = binaryHexToString(anByteBuffer);        
262         if ((sResId == null) || (sResId.equals("0404") == false))
263         {
264             BGPError("IPTC resource ID not found");
265             return null;
266         }
267         
268         // The next "n" bytes contains the Name of the resource block
269         // It is a PString so the first byte is the length.  A null string
270         // here will contain a 0 length with a 0 byte following.  We don't
271         // particularily care about this string so just get the length and
272         // skip over it.
273         anByteBuffer = readBytes(2);
274         String sCount = binaryHexToChar(anByteBuffer);
275         if (sCount.equals("00") == false)
276         {
277             // The PString is not zero length so skip count bytes
278             int nCount = parseStrToInt(sCount);
279             if (nCount == -1)
280             {
281                 return null;
282             }
283             
284             // Skip 2 x nCount bytes since there are 2 bytes per ascii char
285             if (skipBytes(2*nCount) == false)
286             {
287                 BGPError("Error skipping PString");
288                 return null;
289             }
290         } // nCount if
291         
292         else
293         {
294             // The PString is a zero length string so the next char (2 bytes)
295             // will be ascii "0". Just skip over them
296             if (skipBytes(2) == false)
297             {
298                 BGPError("Error skipping PString");
299                 return null;
300             }
301         }
302         
303         // The next 4 bytes contains the length of the IPTC resource block.
304         // Note that this is the length before it was encoded in a device independent
305         // preview using binary hex, so it is not an accurate representation of the 
306         // actual number of bytes in the resource block in the file. Save it for
307         // later approximations.
308         anByteBuffer = readBytes(8);
309         String sResourceCount = binaryHexToString(anByteBuffer);
310         if (sResourceCount == null)
311         {
312             BGPError("Could not binaryHex convert resource block count");
313             return null;
314         }
315         
316         int nResourceCount    = parseStrToInt(sResourceCount, 16);
317         
318         // The block length is always even and, therefore, possibly padded but the length given
319         // in the header is the pre-padded length.
320         // Adjust the length count to recognize the padding.
321         if ((nResourceCount % 2) != 0)
322         {
323           ++nResourceCount;
324         }
325         ABNDebug("Resource block length: " + nResourceCount);
326         
327         // Save the file position as a possible reference point for the resource byte count
328         long nResourceBlockStart = savePointer();
329         if (nResourceBlockStart == -1)
330         {
331             return null;
332         }
333         
334         // Process the IPTC resource tags
335         abResourceBlockData = parseIptcTags(nResourceCount);
336         
337         return abResourceBlockData;
338     } // parseResouceBlock
339     
340     
341     /**
342     *
343     *
344     */
345     protected byte[] parseIptcTags(int nResourceCount)
346     {
347         byte[] abTagData = null;
348         
349         // IPTC resource tag format:
350         //      tag marker      0x1c    1 byte
351         //      record #        n       1 byte
352         //      dataset #       n       1 byte
353         //      data count      n       2 bytes
354         //      data            xxx     y bytes   y = f(data count, separators)
355         //
356         // Remember that due to the encoding the data count does not account for
357         // the two-byte ascii chars and the embedded device-independent preview
358         // record separators.  Not knowing the exact data length we will parse the
359         // data byte-by-byte stripping the record separators and converting the
360         // binaryhex to ascii.  The resource block will be terminated with "8BIM"
361         
362         byte[]  anByteBuffer  = null;
363         String  sTagString    = "";
364         boolean bContinue     = true;
365         int     nBufferLength = 512;
366         
367         while (bContinue == true)
368         {
369             // Load the buffer as many times as necessary to process the whole
370             // resource block.  Convert the binaryhex chars to a string.
371             // Scan for block terminator "8BIM"
372             
373             anByteBuffer = readBytes(nBufferLength);
374             if (anByteBuffer == null)
375             {
376                 return null;
377             }
378         
379             // A tag is expected next, the tag marker is 0x1c
380             String sTempString = binaryHexToString(anByteBuffer);
381             if (sTempString == null)
382             {
383                 BGPError("Could not binaryHexConvert tag data");
384                 return null;
385             }
386             
387             // Convert to Ascii string
388             String sRealString = "";
389             String sChar       = "";
390             for (int i=0; i<sTempString.length(); i+=2)
391             {
392                 byte[] ab = new byte[1];
393                 String sSub = sTempString.substring(i, i+2);
394                 try
395                 {
396                     ab[0] = (byte)Integer.parseInt(sSub, 16);                    
397                 }
398                 
399                 catch(NumberFormatException nfe)
400                 {
401                     //We have found a very strange byte in this char
402                     BGPError("Strange byte found in IPTC tag string: " + sSub + ". " + nfe.toString());
403                     BGPError("Index: " + i);
404                     BGPError("Substring: "  + sRealString);
405                     BGPError("Tag String: " + sTagString);
406                     //Backup index by one so we can retry the second byte of the char
407  //                   i--;
408  //                   continue;
409                     return null;
410                 }
411                 
412                 // Convert the decimal char to a string char
413                 sChar = new String(ab);
414                 
415                 sRealString += sChar;    
416             } // for i
417             
418             // Append the string to the rest
419             sTagString += sRealString;
420             
421             // Check for "8BIM"
422             // A string search would be the easiest but it doesn't work, presumably because
423             // there are some non-printable binary bytes (tag markers and tag lengths) in the string.
424             // Thus we do a binary search.
425             // Note that we don't have the buffer boundry problems since we search the whole accumulated
426             // string each time.
427             byte an8BIM[] = {0x38, 0x42, 0x49, 0x4d};
428             byte anTempTagData[] = sTagString.getBytes();
429             int  i = 0;
430             while(i < anTempTagData.length - 3)
431             {
432                 if ((anTempTagData[i]   == an8BIM[0]) &&
433                     (anTempTagData[i+1] == an8BIM[1]) &&       
434                     (anTempTagData[i+2] == an8BIM[2]) &&
435                     (anTempTagData[i+3] == an8BIM[3])) 
436                 {
437                      ABNDebug("8BIM found at index: " + i);
438                      // The end-of-resource block signature was found
439                      // Truncate the string at the signature index
440                      // By the way, this index should, finally, be the same as the resource block
441                      // length obtained at the beginning of the header (nResourceCount).
442                      // Remember that we could not effectively use it before because of the record terminators
443                      // embedded in the data defining the arbitrary-length records.
444                      
445                      // Truncate the string                     
446                      sTagString = sTagString.substring(0, i);
447                      bContinue = false;
448                      break;
449                 } // anTempTagData[xxx] if
450             
451             // Increment the index for the next byte
452             i++;        
453             
454             } // while i
455 
456             // This is just a rough check to make sure that if the terminator
457             // is missed that we quit before too long.  At this point the resource block length
458             // obtained from the head should be accurate since the record teminators have been removed
459             if (sTagString.length() > nResourceCount)
460             {
461                 BGPError("The parsed string length has exceeded the resource block length specified in the header: " 
462                          + sTagString.length() + ", " + nResourceCount);
463                 return null;
464             }
465             
466         } //  while bContinue
467         
468 
469         // Copy the tag string to the tag data array
470         abTagData = sTagString.getBytes();
471         printTagData(abTagData);
472         return abTagData;
473     } // parseIptcTags
474     
475     /**
476     *
477     *
478     */
479     protected boolean isNextString(String sToMatch)
480     {
481         int nLength = sToMatch.length();
482         
483         // Read x bytes
484         byte anData[] = readBytes(2*nLength);
485         if (anData == null)
486         {
487             return false;
488         }
489         
490         // Convert to string
491         String sTempString = binaryHexToString(anData);
492         if (sTempString == null)
493         {
494             return false;
495         }
496         
497         // Compare for match and return result
498         return sTempString.equals(sToMatch);
499     } // isNextString
500     
501     
502     /**
503     *
504     *
505     */
506     protected byte[] readBytes(int nCount)
507     {
508         byte[] anBytes = new byte[nCount];
509         
510         if (refFile == null)
511         {
512             BGPError("File reference is null pointer - readBytes");
513             return null;
514         }
515         
516         try
517         {
518             int nStatus = refFile.read(anBytes); 
519             if (nStatus == -1)
520             {
521                 BGPError("Could not read. End of file");
522                 return null;
523             }
524         }
525         
526         catch (IOException ioe)
527         {
528             BGPError("Error reading file: " + ioe.toString());
529             return null;
530         }
531         
532         return anBytes;
533     } // readBytes
534     
535     /**
536     *
537     *
538     */
539     protected String getNextChar()
540     {
541         String sNextChar    = null;
542         byte   anFileData[] = null;
543             
544         // Read a byte    
545         anFileData = readBytes(1);
546         if (anFileData == null)
547         {
548             return null;
549         }
550             
551         // Convert the binary data to a string
552         sNextChar = new String(anFileData);
553         
554         return sNextChar;
555     } // getNextChar
556     
557     
558     /**
559     *
560     *
561     */
562     protected boolean findChar(char cChar)
563     {
564         boolean bResult   = false;
565         String  sFileData = "";
566         
567         // Walk thru the file looking for the specified character
568         while(true)
569         {
570             sFileData = getNextChar();
571             if (sFileData == null)
572             {
573                 BGPError("Could not find the character: " + cChar);
574                 bResult = false;
575                 break;
576             }
577  
578             if (sFileData.charAt(0) == cChar)
579             {
580                 ABNDebug("Character " + cChar + " found");
581                 bResult = true;
582                 break;
583             } // sFileData.equals if
584          } // while        
585         
586         return bResult;
587     } // findChar
588     
589    
590     /**
591     *
592     *
593     */
594     protected boolean skipBytes(int nCount)
595     {
596         boolean bResult = true;
597         
598         if (refFile == null)
599         {
600             BGPError("File reference is a null pointer - skipBytes");
601             return false;
602         }
603         
604         try
605         {
606             refFile.skipBytes(nCount);    
607         }
608         
609         catch (IOException ioe)
610         {
611             BGPError("File error in skipBytes: " + ioe.toString());
612             bResult = false;
613         }
614 /*        
615         catch (EOFException eoe)
616         {
617             BGPError("File error in skipBytes: " + eoe.toString());
618             bResult = false;
619         }
620 */        
621         return bResult;
622     } // skipBytes
623     
624     /**
625     * Binary hex consists of two consequtive binary hex ascii bytes each of which
626     * when decoded represents half of a hex ascii char.
627     * For example: 33 38 -> 38 
628     * This method converts the binary hex bytes to a string character
629     */
630     protected String binaryHexToChar(byte[] abBinary)
631     {
632         // Convert the two binary hex digits to a single hex ascii char
633         String sChar = new String(abBinary);
634 //        ABNDebug(sChar);
635         
636         return sChar;    
637     } // binaryHexToChar
638 
639 
640     /**
641     *
642     *
643     */
644     protected String binaryHexToString(byte[] abBinaryData)
645     {
646         String  sResult       = "";
647         String  sIntermediate = "";
648         boolean bPercentFound = false;
649         byte[]  abTemp        = new byte[2];
650         
651         // Convert each 2 consecutive bytes to a string character
652         int nIndex = 0;
653         int nLength = abBinaryData.length;
654         while(nIndex < nLength)
655         {
656             
657             // Check for straight binary chars representing the terminator
658             // or start of the binhex record, skip them
659             if (abBinaryData[nIndex] == 0xd) // Optional Record terminator
660             {                
661                 ABNDebug("0d found");
662                 nIndex++;
663                 continue;
664             }
665                 
666             if (abBinaryData[nIndex] == 0x0a) // Optional Record terminator
667             {                
668                 ABNDebug("0a found");
669                 nIndex++;
670                 continue;
671             }
672                 
673             if (abBinaryData[nIndex] == 0x25)  // "%" record start
674             {
675                 ABNDebug("25 (%) found");
676                 bPercentFound = true;
677                 nIndex++;
678                 continue;
679             }
680                 
681             if ((bPercentFound == true) && (abBinaryData[nIndex] == 0x20))    
682             {
683                 ABNDebug("20 found immediately after %");   
684                 bPercentFound = false;
685                 nIndex++;
686                 continue;
687             }
688  
689             bPercentFound = false;
690             
691             abTemp[0] = abBinaryData[nIndex++];
692             if (nIndex < nLength)
693             {
694                 abTemp[1] = abBinaryData[nIndex++];
695                 if (abTemp[1] < 0x30)
696                 {
697                     BGPError("BinaryHex convert, Second byte not valid (<0x30h): " + abTemp[1] + " decimal.");
698                     BGPError("First byte: " + abTemp[0] + " decimal.");
699                     return null;
700                 }
701             }
702             
703             sIntermediate = binaryHexToChar(abTemp);
704             
705             // Append the converted char to the return string
706             sResult  += sIntermediate;
707         } // while
708         
709         ABNDebug("Hex to string conversion: " + sResult);
710         
711         return sResult;
712     } // binaryHexToString
713     
714     /**
715     *
716     *
717     */
718     protected boolean restorePointer(long nFilePosition)
719     {
720         boolean bResult = true;
721         if (refFile == null)
722         {
723             return false;
724         }
725         
726         try
727         {
728             refFile.seek(nFilePosition);
729         }
730         
731         catch (IOException ioe)
732         {
733             BGPError("File seek error: " + ioe.toString());
734             bResult = false;        
735         }
736         
737         return bResult;
738     } // restorePointer
739 
740     /**
741     *
742     *
743     */
744     protected long savePointer()
745     {
746         long nPos = -1;
747         if (refFile == null)
748         {
749             return -1;
750         }
751         
752         try
753         {
754             nPos = refFile.getFilePointer();
755         }
756         
757         catch (IOException ioe)
758         {
759             BGPError("File error getting position: " + ioe.toString());
760             nPos = -1;
761         }
762         
763         return nPos;
764     } // savePointer
765 
766     /**
767     *
768     *
769     */
770     protected int parseStrToInt(String sNumber)
771     {
772         return parseStrToInt(sNumber, 0);
773     } // parseStrToInt
774     
775     /**
776     *
777     *
778     */
779     protected int parseStrToInt(String sNumber, int nRadix)
780     {
781         int nNumber = -1;
782         
783         if (nRadix == 0)
784         {
785             nRadix = 10;
786         }
787         
788         try
789         {
790             nNumber = Integer.parseInt(sNumber, nRadix);
791         }
792             
793         catch(NumberFormatException nfe)
794         {
795             BGPError("Error converting string to int: " + nfe);
796         }
797             
798         return nNumber;
799     } // parseStrToInt
800 
801     
802     /** Obtain the value for the specified Dataset number.
803     * @return the Dataset number value.
804     * @param sDatasetID The IPTC Dataset ID number (A:BB)
805     */
806     public String getDatasetValue( String sDatasetID ) 
807     {
808        return (String) hDataSets.get( sDatasetID );
809     } // end of getDataset
810    
811     
812     /**
813     *  Debug code
814     *
815     */
816     protected void printTagData(byte[] anTagData)
817     {
818         // Print out the contents of the tag data buffer
819         // Use the hard way since printing as a string does'nt work well due
820         // to the various wierd characters that it may contain.
821         // Replace all chars < 0x02 (except for 0x0d) with an asterisk
822         // Note that the replaced characters are important for this application.
823         // They are the tags.
824         String sBuffer = "";
825         for (int i=0; i<anTagData.length; i++)        
826         {
827            byte nChar = anTagData[i]; 
828            if ((nChar < 0x20) && (nChar != 0x0d))
829            {
830                 nChar = 0x2a;
831            }
832            
833            sBuffer += (char)nChar; 
834         } // for i
835 
836         ABNDebug(sBuffer);
837     } // printTagData
838     
839     
840     /**
841     *
842     *
843     */
844     public void debugPrintHash()
845     {
846         System.out.println("");
847         System.out.println("****** IPTC Hashtable ******");
848         String sValue = getDatasetValue("2:120"); // Caption
849         if (sValue != null)
850         {
851           sValue = putInCRs(sValue);
852           System.out.println(sValue);
853           System.out.println("String Size: " + sValue.length());
854         }
855         sValue = getDatasetValue("2:25");         // Keyword
856         if (sValue != null)
857         {
858           sValue = putInCRs(sValue);
859           System.out.println(sValue);
860           System.out.println("String Size: " + sValue.length());
861         }
862         sValue = getDatasetValue("2:55");         // Date created
863         if (sValue != null)
864         {
865           sValue = putInCRs(sValue);
866           System.out.println(sValue);
867           System.out.println("String Size: " + sValue.length());
868         }
869     } // debugPrintHash
870     
871    
872     /**
873     *
874     *
875     */
876     public Hashtable getIptcHashtable()
877     {
878         return hDataSets;
879     }
880     
881     /**
882     *
883     *
884     */
885     protected String putInCRs(String sInput)
886     {
887         String sOutput  = "";
888         byte[] abInput  = sInput.getBytes();
889         byte[] abOutput = new byte[abInput.length + 200];
890         int j = 0;
891         
892         for (int i=0; i<abInput.length; i++)
893         {
894             if (abInput[i] == 0x0d)
895             {
896                 abOutput[j++] = 0x0a;
897             }
898             
899             abOutput[j++] = abInput[i];
900         } // for
901     
902         sOutput = new String(abOutput, 0, j);
903         return sOutput;
904     } // putInCRs
905     
906     /**
907     *
908     *
909     */
910     protected void ABNDebug(String sMsg)
911     {
912         if(bDebug == true)
913         {
914             BGPDebug(sMsg);
915         }
916     }
917     
918 } // class EPSParser