Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: joelib/io/types/cml/CMLCoreModule.java


1   ///////////////////////////////////////////////////////////////////////////////
2   //Filename: $RCSfile: CMLCoreModule.java,v $
3   //Purpose:  Chemical Markup Language.
4   //Language: Java
5   //Compiler: JDK 1.4
6   //Authors:  steinbeck@ice.mpg.de, gezelter@maul.chem.nd.edu,
7   //      egonw@sci.kun.nl, wegnerj@informatik.uni-tuebingen.de
8   //Version:  $Revision: 1.13 $
9   //      $Date: 2003/08/22 15:56:18 $
10  //      $Author: wegner $
11  //
12  //Copyright (C) 1997-2003  The Chemistry Development Kit (CDK) project
13  //Copyright (c) Dept. Computer Architecture, University of Tuebingen, Germany
14  //
15  //This program is free software; you can redistribute it and/or
16  //modify it under the terms of the GNU Lesser General Public License
17  //as published by the Free Software Foundation; either version 2.1
18  //of the License, or (at your option) any later version.
19  //All we ask is that proper credit is given for our work, which includes
20  //- but is not limited to - adding the above copyright notice to the beginning
21  //of your source code files, and to any copyright notice that you may distribute
22  //with programs based on this work.
23  //  
24  //This program is distributed in the hope that it will be useful,
25  //but WITHOUT ANY WARRANTY; without even the implied warranty of
26  //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27  //GNU Lesser General Public License for more details.
28  //  
29  //You should have received a copy of the GNU Lesser General Public License
30  //along with this program; if not, write to the Free Software
31  //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
32  ///////////////////////////////////////////////////////////////////////////////
33  package joelib.io.types.cml;
34  
35  import joelib.desc.result.*;
36  
37  import joelib.io.*;
38  
39  import joelib.io.types.cml.elements.*;
40  
41  import joelib.util.*;
42  
43  import joelib.util.types.*;
44  
45  import java.util.*;
46  
47  import org.apache.log4j.*;
48  
49  import org.xml.sax.*;
50  
51  
52  /**
53   * Core CML 1.x and 2.0 elements are parsed by this class.
54   *
55   * <p>Please file a bug report if this parser fails to parse
56   * a certain element or attribute value in a valid CML document.
57   *
58   * @author egonw
59   * @author c.steinbeck@uni-koeln.de
60   * @author gezelter@maul.chem.nd.edu
61   * @author wegnerj
62   * @license LGPL
63   * @cvsversion    $Revision: 1.13 $, $Date: 2003/08/22 15:56:18 $
64   * @cite rr99b
65   * @cite mr01
66   * @cite gmrw01
67   * @cite wil01
68   **/
69  public class CMLCoreModule implements ModuleInterface
70  {
71      //~ Static fields/initializers /////////////////////////////////////////////
72  
73      // Obtain a suitable logger.
74      private static Category logger = Category.getInstance(
75              "joelib.io.types.cml.CMLCoreModule");
76      public final static int UNKNOWN = -1;
77      public final static int STRING = 1;
78      public final static int LINK = 2;
79      public final static int FLOAT = 3;
80      public final static int INTEGER = 4;
81      public final static int STRINGARRAY = 5;
82      public final static int FLOATARRAY = 6;
83      public final static int INTEGERARRAY = 7;
84      public final static int FLOATMATRIX = 8;
85      public final static int COORDINATE2 = 9;
86      public final static int COORDINATE3 = 10;
87      public final static int ANGLE = 11;
88      public final static int TORSION = 12;
89      public final static int LIST = 13;
90      public final static int MOLECULE = 14;
91      public final static int ATOM = 15;
92      public final static int ATOMARRAY = 16;
93      public final static int BOND = 17;
94      public final static int BONDARRAY = 18;
95      public final static int ELECTRON = 19;
96      public final static int REACTION = 20;
97      public final static int CRYSTAL = 21;
98      public final static int SEQUENCE = 22;
99      public final static int FEATURE = 23;
100     public final static int MATRIX = 24;
101     public final static int ARRAY = 25;
102     public final static int SCALAR = 26;
103     public final static int BOND_STEREO = 27;
104     public final static int NAME = 28;
105 
106     //~ Instance fields ////////////////////////////////////////////////////////
107 
108     public Hashtable atomElements = new Hashtable(23);
109     public Hashtable elements = new Hashtable(23);
110     protected final String SYSTEMID = "CML-1999-05-15";
111 
112     // array
113     protected ArrayCML array;
114     protected CDOInterface cdo;
115 
116     // matrix
117     protected MatrixCML matrix;
118 
119     //scalar
120     protected ScalarCML scalar;
121     protected String BUILTIN;
122     protected String arrayTitle;
123     protected String currentChars;
124     protected String delimiter;
125     protected String elementTitle;
126     protected String matrixColumns;
127     protected String matrixDelimiter;
128     protected String matrixRows;
129     protected String matrixTitle;
130     protected String moleculeName;
131     protected Vector arrays;
132     protected Vector atomParities;
133     protected Vector bondARef1;
134     protected Vector bondARef2;
135     protected Vector bondStereo;
136     protected Vector bondid;
137     protected Vector elid;
138     protected Vector elsym;
139     protected Vector formalCharges;
140     protected Vector hCounts;
141     protected Vector isotopes;
142     protected Vector matrices;
143     protected Vector order;
144     protected Vector partialCharges;
145 
146     //descriptors
147     protected Vector scalars;
148     protected Vector strings;
149     protected Vector x2;
150     protected Vector x3;
151     protected Vector y2;
152     protected Vector y3;
153     protected Vector z3;
154     protected boolean stereoGiven;
155     protected int curRef;
156     protected int currentElement;
157 
158     //~ Constructors ///////////////////////////////////////////////////////////
159 
160     public CMLCoreModule(CDOInterface cdo)
161     {
162         initialize();
163         this.cdo = cdo;
164     }
165 
166     public CMLCoreModule(ModuleInterface conv)
167     {
168         initialize();
169         inherit(conv);
170     }
171 
172     //~ Methods ////////////////////////////////////////////////////////////////
173 
174     public void characterData(char[] ch, int start, int length)
175     {
176         //logger.debug("CD");
177         String s = (new String(ch, start, length));
178 
179         //System.out.println("start:"+start+" length:"+length+" s:"+s);
180         if (s.trim().length() == 0)
181         {
182             return;
183         }
184 
185         //System.out.println(BUILTIN+" "+elementTitle+"="+s);
186         switch (currentElement)
187         {
188         case STRING:
189 
190             if (logger.isDebugEnabled())
191             {
192                 logger.debug("Builtin: " + BUILTIN);
193             }
194 
195             if (BUILTIN.equals("elementType"))
196             {
197                 //logger.debug("Element: " + s.trim());
198                 elsym.addElement(s);
199             }
200             else if (BUILTIN.equals("atomRef"))
201             {
202                 curRef++;
203 
204                 //logger.debug("Bond: ref #" + curRef);
205                 if (curRef == 1)
206                 {
207                     bondARef1.addElement(s.trim());
208                 }
209                 else if (curRef == 2)
210                 {
211                     bondARef2.addElement(s.trim());
212                 }
213             }
214             else if (BUILTIN.equals("order"))
215             {
216                 //logger.debug("Bond: order " + s.trim());
217                 order.addElement(s.trim());
218             }
219             else if (BUILTIN.equals("stereo"))
220             {
221                 //logger.debug("Bond: stereo " + s.trim());
222                 //System.out.println("Bond: stereo " + s.trim());
223                 bondStereo.addElement(s.trim());
224                 stereoGiven = true;
225             }
226             else if (BUILTIN.equals("formalCharge"))
227             {
228                 // NOTE: this combination is in violation of the CML DTD!!!
229                 //logger.debug("Charge: " + s.trim());
230                 formalCharges.addElement(s.trim());
231             }
232             else if (BUILTIN.equals("isotope"))
233             {
234                 isotopes.addElement(s.trim());
235             }
236             else
237             {
238                 String tmp = s.trim();
239 
240                 if (tmp.length() != 0)
241                 {
242                     strings.add(new StringString(elementTitle, tmp));
243                 }
244             }
245 
246             break;
247 
248         case FLOAT:
249 
250             if (BUILTIN.equals("x3"))
251             {
252                 x3.addElement(s.trim());
253             }
254             else if (BUILTIN.equals("y3"))
255             {
256                 y3.addElement(s.trim());
257             }
258             else if (BUILTIN.equals("z3"))
259             {
260                 z3.addElement(s.trim());
261             }
262             else if (BUILTIN.equals("x2"))
263             {
264                 x2.addElement(s.trim());
265             }
266             else if (BUILTIN.equals("y2"))
267             {
268                 y2.addElement(s.trim());
269             }
270             else if (BUILTIN.equals("order"))
271             {
272                 // NOTE: this combination is in violation of the CML DTD!!!
273                 order.addElement(s.trim());
274             }
275             else if (BUILTIN.equals("charge") ||
276                     BUILTIN.equals("partialCharge"))
277             {
278                 partialCharges.addElement(s.trim());
279             }
280             else
281             {
282                 String tmp = s.trim();
283                 DoubleResult dr = new DoubleResult();
284 
285                 if (!dr.fromString(IOTypeHolder.instance().getIOType("CML"), tmp))
286                 {
287                     logger.error("Double entry " + elementTitle + "=" + tmp +
288                         " was not successfully parsed.");
289                 }
290                 else
291                 {
292                     scalars.add(new StringObject(elementTitle, dr));
293                 }
294             }
295 
296             break;
297 
298         case INTEGER:
299 
300             if (BUILTIN.equals("formalCharge"))
301             {
302                 formalCharges.addElement(s.trim());
303             }
304             else if (BUILTIN.equals("hydrogenCount"))
305             {
306                 hCounts.addElement(s.trim());
307             }
308             else if (BUILTIN.equals("isotope"))
309             {
310                 isotopes.addElement(s.trim());
311             }
312             else
313             {
314                 IntResult ir = new IntResult();
315 
316                 if (!ir.fromString(IOTypeHolder.instance().getIOType("CML"),
317                             s.trim()))
318                 {
319                     logger.error("Integer entry " + elementTitle + "=" +
320                         s.trim() + " was not successfully parsed.");
321                 }
322                 else
323                 {
324                     scalars.add(new StringObject(elementTitle, ir));
325                 }
326             }
327 
328             break;
329 
330         case COORDINATE2:
331 
332             if (BUILTIN.equals("xy2"))
333             {
334                 //logger.debug("New coord2 xy2 found." + s);
335                 try
336                 {
337                     StringTokenizer st = new StringTokenizer(s.trim());
338                     x2.addElement(st.nextToken());
339                     y2.addElement(st.nextToken());
340                 }
341                  catch (Exception e)
342                 {
343                     notify("CMLParsing error: " + e, SYSTEMID, 175, 1);
344                 }
345             }
346 
347             break;
348 
349         case COORDINATE3:
350             currentChars = currentChars + s;
351 
352             break;
353 
354         case STRINGARRAY:
355 
356             if (BUILTIN.equals("id") || BUILTIN.equals("atomId"))
357             {
358                 // use of "id" seems incorrect by quick look at DTD
359                 try
360                 {
361                     StringTokenizer st = new StringTokenizer(s.trim());
362 
363                     while (st.hasMoreTokens())
364                     {
365                         String token = st.nextToken();
366 
367                         //logger.debug("StringArray (Token): " + token);
368                         elid.addElement(token);
369                     }
370                 }
371                  catch (Exception e)
372                 {
373                     notify("CMLParsing error: " + e, SYSTEMID, 186, 1);
374                 }
375             }
376             else if (BUILTIN.equals("elementType"))
377             {
378                 try
379                 {
380                     StringTokenizer st = new StringTokenizer(s.trim());
381 
382                     while (st.hasMoreTokens())
383                     {
384                         elsym.addElement(st.nextToken());
385                     }
386                 }
387                  catch (Exception e)
388                 {
389                     notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
390                 }
391             }
392             else if (BUILTIN.equals("atomRefs"))
393             {
394                 curRef++;
395 
396                 //logger.debug("New atomRefs found: " + curRef);
397                 try
398                 {
399                     StringTokenizer st = new StringTokenizer(s.trim());
400 
401                     while (st.hasMoreTokens())
402                     {
403                         String token = st.nextToken();
404 
405                         //logger.debug("Token: " + token);
406                         if (curRef == 1)
407                         {
408                             bondARef1.addElement(token);
409                         }
410                         else if (curRef == 2)
411                         {
412                             bondARef2.addElement(token);
413                         }
414                     }
415                 }
416                  catch (Exception e)
417                 {
418                     notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
419                 }
420             }
421             else if (BUILTIN.equals("order"))
422             {
423                 //logger.debug("New bond order found.");
424                 try
425                 {
426                     StringTokenizer st = new StringTokenizer(s.trim());
427 
428                     while (st.hasMoreTokens())
429                     {
430                         String token = st.nextToken();
431 
432                         //logger.debug("Token: " + token);
433                         order.addElement(token);
434                     }
435                 }
436                  catch (Exception e)
437                 {
438                     notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
439                 }
440             }
441             else if (BUILTIN.equals("stereo"))
442             {
443                 //logger.debug("New bond order found.");
444                 try
445                 {
446                     StringTokenizer st = new StringTokenizer(s.trim());
447 
448                     while (st.hasMoreTokens())
449                     {
450                         String token = st.nextToken();
451 
452                         //logger.debug("Token: " + token);
453                         bondStereo.addElement(token);
454                     }
455                 }
456                  catch (Exception e)
457                 {
458                     notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
459                 }
460             }
461 
462             break;
463 
464         case INTEGERARRAY:
465 
466             //logger.debug("IntegerArray: builtin = " + BUILTIN);
467             if (BUILTIN.equals("formalCharge"))
468             {
469                 try
470                 {
471                     StringTokenizer st = new StringTokenizer(s.trim());
472 
473                     while (st.hasMoreTokens())
474                     {
475                         String token = st.nextToken();
476 
477                         //logger.debug("Charge added: " + token);
478                         formalCharges.addElement(token);
479                     }
480                 }
481                  catch (Exception e)
482                 {
483                     notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
484                 }
485             }
486             else if (BUILTIN.equals("hydrogenCount"))
487             {
488                 try
489                 {
490                     StringTokenizer st = new StringTokenizer(s);
491 
492                     while (st.hasMoreTokens())
493                     {
494                         String token = st.nextToken();
495 
496                         //logger.debug("Hydrogen count added: " + token);
497                         hCounts.addElement(token);
498                     }
499                 }
500                  catch (Exception e)
501                 {
502                     notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
503                 }
504             }
505             else if (BUILTIN.equals("isotope"))
506             {
507                 try
508                 {
509                     StringTokenizer st = new StringTokenizer(s.trim());
510 
511                     while (st.hasMoreTokens())
512                     {
513                         String token = st.nextToken();
514 
515                         //logger.debug("Isotope added: " + token);
516                         isotopes.addElement(token);
517                     }
518                 }
519                  catch (Exception e)
520                 {
521                     notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
522                 }
523             }
524             else
525             {
526                 String tmp = s.trim();
527 
528                 if (tmp.length() != 0)
529                 {
530                     //System.out.println("intArray " + elementTitle);
531                     IntArrayResult iar = new IntArrayResult();
532 
533                     if (delimiter == null)
534                     {
535                         delimiter = " \t\r\n";
536                     }
537                     else
538                     {
539                         iar.addCMLProperty(new StringString("delimiter",
540                                 delimiter));
541                     }
542 
543                     if (!iar.fromString(IOTypeHolder.instance().getIOType("CML"),
544                                 tmp))
545                     {
546                         logger.error("Integer array entry " + arrayTitle + "=" +
547                             tmp + " was not successfully parsed.");
548                     }
549 
550                     iar.setIntArray(ArrayHelper.intArrayFromSimpleString(tmp,
551                             delimiter));
552                     arrays.add(new StringObject(elementTitle, iar));
553                 }
554             }
555 
556             break;
557 
558         case FLOATARRAY:
559 
560             //System.out.println(BUILTIN+"="+s);
561             if (BUILTIN.equals("x3"))
562             {
563                 try
564                 {
565                     StringTokenizer st = new StringTokenizer(s.trim());
566 
567                     while (st.hasMoreTokens())
568                     {
569                         x3.addElement(st.nextToken());
570                     }
571                 }
572                  catch (Exception e)
573                 {
574                     notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
575                 }
576             }
577             else if (BUILTIN.equals("y3"))
578             {
579                 try
580                 {
581                     StringTokenizer st = new StringTokenizer(s.trim());
582 
583                     while (st.hasMoreTokens())
584                     {
585                         y3.addElement(st.nextToken());
586                     }
587                 }
588                  catch (Exception e)
589                 {
590                     notify("CMLParsing error: " + e, SYSTEMID, 213, 1);
591                 }
592             }
593             else if (BUILTIN.equals("z3"))
594             {
595                 try
596                 {
597                     StringTokenizer st = new StringTokenizer(s.trim());
598 
599                     while (st.hasMoreTokens())
600                     {
601                         z3.addElement(st.nextToken());
602                     }
603                 }
604                  catch (Exception e)
605                 {
606                     notify("CMLParsing error: " + e, SYSTEMID, 221, 1);
607                 }
608             }
609             else if (BUILTIN.equals("x2"))
610             {
611                 //logger.debug("New floatArray found.");
612                 try
613                 {
614                     StringTokenizer st = new StringTokenizer(s.trim());
615 
616                     while (st.hasMoreTokens())
617                     {
618                         x2.addElement(st.nextToken());
619                     }
620                 }
621                  catch (Exception e)
622                 {
623                     notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
624                 }
625             }
626             else if (BUILTIN.equals("y2"))
627             {
628                 //logger.debug("New floatArray found.");
629                 try
630                 {
631                     StringTokenizer st = new StringTokenizer(s.trim());
632 
633                     while (st.hasMoreTokens())
634                     {
635                         y2.addElement(st.nextToken());
636                     }
637                 }
638                  catch (Exception e)
639                 {
640                     notify("CMLParsing error: " + e, SYSTEMID, 454, 1);
641                 }
642             }
643             else if (BUILTIN.equals("partialCharge"))
644             {
645                 //logger.debug("New floatArray with partial charges found.");
646                 try
647                 {
648                     StringTokenizer st = new StringTokenizer(s.trim());
649 
650                     while (st.hasMoreTokens())
651                     {
652                         partialCharges.addElement(st.nextToken());
653                     }
654                 }
655                  catch (Exception e)
656                 {
657                     notify("CMLParsing error: " + e, SYSTEMID, 462, 1);
658                 }
659             }
660             else if (BUILTIN.equals("formalCharge"))
661             {
662                 //logger.debug("New floatArray with partial charges found.");
663                 try
664                 {
665                     StringTokenizer st = new StringTokenizer(s.trim());
666 
667                     while (st.hasMoreTokens())
668                     {
669                         formalCharges.addElement(st.nextToken());
670                     }
671                 }
672                  catch (Exception e)
673                 {
674                     notify("CMLParsing error: " + e, SYSTEMID, 462, 1);
675                 }
676             }
677             else
678             {
679                 String tmp = s.trim();
680 
681                 if (tmp.length() != 0)
682                 {
683                     //System.out.println("floatArray " + elementTitle);
684                     DoubleArrayResult dar = new DoubleArrayResult();
685 
686                     if (delimiter == null)
687                     {
688                         delimiter = " \t\r\n";
689                     }
690                     else
691                     {
692                         dar.addCMLProperty(new StringString("delimiter",
693                                 delimiter));
694                     }
695 
696                     if (!dar.fromString(IOTypeHolder.instance().getIOType("CML"),
697                                 tmp))
698                     {
699                         logger.error("Double array entry " + arrayTitle + "=" +
700                             tmp + " was not successfully parsed.");
701                     }
702                     else
703                     {
704                         dar.setDoubleArray(ArrayHelper.doubleArrayFromSimpleString(
705                                 tmp, delimiter));
706                         arrays.add(new StringObject(elementTitle, dar));
707                     }
708                 }
709             }
710 
711             break;
712 
713         case FLOATMATRIX:
714 
715             //logger.debug("FloatMatrix: builtin = " + BUILTIN);
716             //if (BUILTIN.equals("???"))
717             //{
718             //}
719             //else
720             //{
721             String tmp = s.trim();
722 
723             if (tmp.length() != 0)
724             {
725                 // much more efficient and more standard
726                 DoubleMatrixResult matrix = new DoubleMatrixResult();
727 
728                 if (matrixDelimiter == null)
729                 {
730                     matrixDelimiter = " \t\r\n";
731                 }
732                 else
733                 {
734                     matrix.addCMLProperty(new StringString("delimiter",
735                             matrixDelimiter));
736                 }
737 
738                 if (matrixRows == null)
739                 {
740                     logger.error("Number of rows is missing in FloatMatrix '" +
741                         matrixTitle + "'.");
742                 }
743                 else if (matrixColumns == null)
744                 {
745                     logger.error(
746                         "Number of columns is missing in FloatMatrix '" +
747                         matrixTitle + "'.");
748                 }
749                 else
750                 {
751                     int rows = Integer.parseInt(matrixRows);
752                     int columns = Integer.parseInt(matrixColumns);
753                     matrix.value = MatrixHelper.doubleMatrixFromSimpleString(tmp,
754                             rows, columns, matrixDelimiter);
755 
756                     if (matrix.value != null)
757                     {
758                         matrices.add(new StringObject(matrixTitle, matrix));
759                     }
760 
761                     //floatMatrices.add(new StringObject(matrixTitle, tmp));
762                 }
763 
764                 //}
765             }
766 
767             break;
768 
769         case MATRIX:
770 
771             if (matrix.characterData(s))
772             {
773                 //o.k., all is fine
774             }
775 
776             break;
777 
778         case ARRAY:
779 
780             if (array.characterData(s))
781             {
782                 //o.k., all is fine
783             }
784 
785             break;
786 
787         case SCALAR:
788 
789             if (scalar.characterData(s))
790             {
791                 //o.k., all is fine
792             }
793 
794             break;
795 
796         case NAME:
797 
798             //System.out.println("Molecule name :"+s.trim());
799             moleculeName = s.trim();
800 
801             break;
802         }
803     }
804 
805     public void endDocument()
806     {
807         cdo.endDocument();
808 
809         if (logger.isDebugEnabled())
810         {
811             logger.debug("End XML Doc");
812         }
813     }
814 
815     public void endElement(String uri, String name, String raw)
816     {
817         if (logger.isDebugEnabled())
818         {
819             logger.debug("EndElement: " + name);
820         }
821 
822         setCurrentElement(name);
823 
824         switch (currentElement)
825         {
826         case BOND:
827 
828             if (!stereoGiven)
829             {
830                 bondStereo.addElement("");
831 
832                 //System.out.println("Add stereo info <empty>");
833             }
834 
835             break;
836 
837         case ATOM:
838 
839             if (elsym.size() > formalCharges.size())
840             {
841                 /* while strictly undefined, assume zero
842                 charge when no number is given */
843                 formalCharges.addElement("0");
844             }
845 
846             if (elsym.size() > hCounts.size())
847             {
848                 /* while strictly undefined, assume zero
849                 implicit hydrogens when no number is given */
850                 hCounts.addElement("0");
851             }
852 
853             if (elsym.size() > isotopes.size())
854             {
855                 /* while strictly undefined, assume zero
856                 charge when no number is given */
857                 isotopes.addElement("0");
858             }
859 
860             /* It may happen that not all atoms have
861                associated 2D coordinates. accept that */
862             if ((elsym.size() > x2.size()) && (x2.size() != 0))
863             {
864                 /* apparently, the previous atoms had atomic
865                    coordinates, add 'null' for this atom */
866                 x2.add(null);
867                 y2.add(null);
868             }
869 
870             break;
871 
872         case MOLECULE:
873             storeData();
874             cdo.endObject("Molecule");
875 
876             break;
877 
878         case COORDINATE3:
879 
880             if (BUILTIN.equals("xyz3"))
881             {
882                 logger.debug("New coord3 xyz3 found: " + currentChars);
883 
884                 try
885                 {
886                     StringTokenizer st = new StringTokenizer(currentChars);
887                     x3.addElement(st.nextToken());
888                     y3.addElement(st.nextToken());
889                     z3.addElement(st.nextToken());
890 
891                     if (logger.isDebugEnabled())
892                     {
893                         //logger.debug("coord3 x3.length: " + x3.size());
894                         //logger.debug("coord3 y3.length: " + y3.size());
895                         //logger.debug("coord3 z3.length: " + z3.size());
896                     }
897                 }
898                  catch (Exception e)
899                 {
900                     logger.error("CMLParsing error while setting coordinate3!");
901                 }
902             }
903             else
904             {
905                 logger.warn("Unknown coordinate3 BUILTIN: " + BUILTIN);
906             }
907 
908             break;
909 
910         case MATRIX:
911             matrix.endElement(name);
912 
913             break;
914 
915         case ARRAY:
916             array.endElement(name);
917 
918             break;
919 
920         case SCALAR:
921             scalar.endElement(name);
922 
923             break;
924 
925         case NAME:
926             break;
927         }
928 
929         currentChars = "";
930         BUILTIN = "";
931         elementTitle = "";
932     }
933 
934     public void inherit(ModuleInterface convention)
935     {
936         if (convention instanceof CMLCoreModule)
937         {
938             CMLCoreModule conv = (CMLCoreModule) convention;
939             this.logger = conv.logger;
940             this.cdo = conv.returnCDO();
941             this.BUILTIN = conv.BUILTIN;
942             this.elsym = conv.elsym;
943             this.elid = conv.elid;
944             this.formalCharges = conv.formalCharges;
945             this.partialCharges = conv.partialCharges;
946             this.isotopes = conv.isotopes;
947             this.x3 = conv.x3;
948             this.y3 = conv.y3;
949             this.z3 = conv.z3;
950             this.x2 = conv.x2;
951             this.y2 = conv.y2;
952             this.hCounts = conv.hCounts;
953             this.atomParities = conv.atomParities;
954             this.bondid = conv.bondid;
955             this.bondARef1 = conv.bondARef1;
956             this.bondARef2 = conv.bondARef2;
957             this.order = conv.order;
958             this.bondStereo = conv.bondStereo;
959             this.curRef = conv.curRef;
960 
961             this.atomElements = conv.atomElements;
962 
963             //descriptors
964             this.scalars = conv.scalars;
965             this.strings = conv.strings;
966             this.matrices = conv.matrices;
967             this.arrays = conv.arrays;
968 
969             // parser
970             this.scalar = conv.scalar;
971             this.array = conv.array;
972             this.matrix = conv.matrix;
973         }
974     }
975 
976     public CDOInterface returnCDO()
977     {
978         return (CDOInterface) this.cdo;
979     }
980 
981     public void startDocument()
982     {
983         if (logger.isDebugEnabled())
984         {
985             logger.debug("Start XML Doc");
986         }
987 
988         cdo.startDocument();
989         newMolecule();
990         BUILTIN = "";
991         curRef = 0;
992     }
993 
994     public void startElement(String uri, String local, String raw,
995         Attributes atts)
996     {
997         String name = local;
998 
999         if (logger.isDebugEnabled())
1000        {
1001            logger.debug("StartElement");
1002        }
1003
1004        setCurrentElement(name);
1005
1006        switch (currentElement)
1007        {
1008        case ATOM:
1009
1010            for (int i = 0; i < atts.getLength(); i++)
1011            {
1012                String att = atts.getQName(i);
1013                String value = atts.getValue(i);
1014
1015                if (logger.isDebugEnabled())
1016                {
1017                    logger.debug("Atom: " + att + "=" + value);
1018                }
1019
1020                Vector vector = (Vector) atomElements.get(att);
1021
1022                if (vector != null)
1023                {
1024                    vector.addElement(value);
1025                }
1026                else
1027                {
1028                    if (logger.isDebugEnabled())
1029                    {
1030                        logger.debug("Unsupported attribute: " + att);
1031                    }
1032                }
1033            }
1034
1035            break;
1036
1037        case BOND:
1038            stereoGiven = false;
1039
1040            for (int i = 0; i < atts.getLength(); i++)
1041            {
1042                String att = atts.getQName(i);
1043
1044                if (logger.isDebugEnabled())
1045                {
1046                    logger.debug("B2 " + att + "=" + atts.getValue(i));
1047                }
1048
1049                if (att.equals("id"))
1050                {
1051                    bondid.addElement(atts.getValue(i));
1052                    logger.debug("B3 " + bondid);
1053                }
1054                else if (att.equals("atomRefs") || // this is CML 1.x support
1055                        att.equals("atomRefs2")) // this is CML 1.x support
1056                { // this is CML 2.0 support
1057
1058                    // expect exactly two references
1059                    try
1060                    {
1061                        StringTokenizer st = new StringTokenizer(atts.getValue(
1062                                    i));
1063                        bondARef1.addElement((String) st.nextElement());
1064                        bondARef2.addElement((String) st.nextElement());
1065                    }
1066                     catch (Exception e)
1067                    {
1068                        logger.error("Error in CML file: " + e.toString());
1069                    }
1070                }
1071                else if (att.equals("order"))
1072                { // this is CML 2.0 support
1073                    order.addElement(atts.getValue(i).trim());
1074                }
1075                else if (att.equals("stereo"))
1076                { // this is CML 2.0 support
1077                    bondStereo.addElement(atts.getValue(i).trim());
1078                    stereoGiven = true;
1079                    System.out.println("Add stereo info " +
1080                        atts.getValue(i).trim());
1081                }
1082            }
1083
1084            curRef = 0;
1085
1086            break;
1087
1088        case COORDINATE2:
1089
1090            for (int i = 0; i < atts.getLength(); i++)
1091            {
1092                if (atts.getQName(i).equals("builtin"))
1093                {
1094                    BUILTIN = atts.getValue(i);
1095                    logger.debug("Valid element coord found, builtin: " +
1096                        atts.getValue(i));
1097                }
1098            }
1099
1100            break;
1101
1102        case COORDINATE3:
1103
1104            for (int i = 0; i < atts.getLength(); i++)
1105            {
1106                if (atts.getQName(i).equals("builtin"))
1107                {
1108                    logger.debug("BUILTIN value set for coordinate3: " +
1109                        atts.getValue(i));
1110                    BUILTIN = atts.getValue(i);
1111                }
1112                else
1113                {
1114                    logger.warn("Unkown coordinate3 builtin value: " +
1115                        atts.getValue(i));
1116                }
1117            }
1118
1119            break;
1120
1121        case STRING:
1122
1123            for (int i = 0; i < atts.getLength(); i++)
1124            {
1125                //System.out.println("string att:"+atts.getQName(i)+" val:"+atts.getValue(i));
1126                if (atts.getQName(i).equals("builtin"))
1127                {
1128                    BUILTIN = atts.getValue(i);
1129                }
1130                else if (atts.getQName(i).equals("title"))
1131                {
1132                    elementTitle = atts.getValue(i);
1133                }
1134            }
1135
1136            break;
1137
1138        case FLOAT:
1139
1140            for (int i = 0; i < atts.getLength(); i++)
1141            {
1142                if (atts.getQName(i).equals("builtin"))
1143                {
1144                    BUILTIN = atts.getValue(i);
1145                }
1146                else if (atts.getQName(i).equals("title"))
1147                {
1148                    elementTitle = atts.getValue(i);
1149                }
1150            }
1151
1152            break;
1153
1154        case INTEGER:
1155
1156            for (int i = 0; i < atts.getLength(); i++)
1157            {
1158                if (atts.getQName(i).equals("builtin"))
1159                {
1160                    BUILTIN = atts.getValue(i);
1161                }
1162                else if (atts.getQName(i).equals("title"))
1163                {
1164                    elementTitle = atts.getValue(i);
1165                }
1166            }
1167
1168            break;
1169
1170        case ATOMARRAY:
1171            break;
1172
1173        case INTEGERARRAY:
1174            delimiter = null;
1175
1176            for (int i = 0; i < atts.getLength(); i++)
1177            {
1178                if (atts.getQName(i).equals("builtin"))
1179                {
1180                    BUILTIN = atts.getValue(i);
1181                }
1182                else if (atts.getQName(i).equals("title"))
1183                {
1184                    elementTitle = atts.getValue(i);
1185                }
1186            }
1187
1188            break;
1189
1190        case STRINGARRAY:
1191            delimiter = null;
1192
1193            for (int i = 0; i < atts.getLength(); i++)
1194            {
1195                if (atts.getQName(i).equals("builtin"))
1196                {
1197                    BUILTIN = atts.getValue(i);
1198                }
1199                else if (atts.getQName(i).equals("title"))
1200                {
1201                    elementTitle = atts.getValue(i);
1202                }
1203            }
1204
1205            break;
1206
1207        case FLOATARRAY:
1208            delimiter = null;
1209
1210            for (int i = 0; i < atts.getLength(); i++)
1211            {
1212                if (atts.getQName(i).equals("builtin"))
1213                {
1214                    BUILTIN = atts.getValue(i);
1215                }
1216
1217                if (atts.getQName(i).equals("title"))
1218                {
1219                    elementTitle = atts.getValue(i);
1220                }
1221            }
1222
1223            break;
1224
1225        case FLOATMATRIX:
1226            matrixRows = null;
1227            matrixColumns = null;
1228            matrixDelimiter = null;
1229
1230            for (int i = 0; i < atts.getLength(); i++)
1231            {
1232                if (atts.getQName(i).equals("builtin"))
1233                {
1234                    BUILTIN = atts.getValue(i);
1235                }
1236
1237                if (atts.getQName(i).equals("title"))
1238                {
1239                    matrixTitle = atts.getValue(i);
1240                }
1241                else if (atts.getQName(i).equals("rows"))
1242                {
1243                    matrixRows = atts.getValue(i);
1244                }
1245                else if (atts.getQName(i).equals("columns"))
1246                {
1247                    matrixColumns = atts.getValue(i);
1248                }
1249                else if (atts.getQName(i).equals("delimiter"))
1250                {
1251                    matrixDelimiter = atts.getValue(i);
1252                }
1253            }
1254
1255            break;
1256
1257        case MOLECULE:
1258            newMolecule();
1259            BUILTIN = "";
1260            cdo.startObject("Molecule");
1261
1262            break;
1263
1264        case CRYSTAL:
1265            cdo.startObject("Crystal");
1266
1267            break;
1268
1269        case LIST:
1270            break;
1271
1272        case MATRIX:
1273            matrix.clear();
1274
1275            for (int i = 0; i < atts.getLength(); i++)
1276            {
1277                matrix.startElement(atts.getQName(i), atts.getValue(i));
1278            }
1279
1280            break;
1281
1282        case ARRAY:
1283            array.clear();
1284
1285            for (int i = 0; i < atts.getLength(); i++)
1286            {
1287                array.startElement(atts.getQName(i), atts.getValue(i));
1288            }
1289
1290            break;
1291
1292        case SCALAR:
1293            scalar.clear();
1294
1295            for (int i = 0; i < atts.getLength(); i++)
1296            {
1297                scalar.startElement(atts.getQName(i), atts.getValue(i));
1298            }
1299
1300            break;
1301        }
1302    }
1303
1304    protected void setCurrentElement(String name)
1305    {
1306        //logger.debug("Current element: " + name);
1307        //System.out.println("Current element: " + name);
1308        Integer integer = (Integer) elements.get(name);
1309
1310        if (integer != null)
1311        {
1312            currentElement = integer.intValue();
1313        }
1314        else
1315        {
1316            currentElement = UNKNOWN;
1317        }
1318
1319        ;
1320    }
1321
1322    protected void newMolecule()
1323    {
1324        elsym = new Vector();
1325        elid = new Vector();
1326        formalCharges = new Vector();
1327        partialCharges = new Vector();
1328        isotopes = new Vector();
1329        x3 = new Vector();
1330        y3 = new Vector();
1331        z3 = new Vector();
1332        x2 = new Vector();
1333        y2 = new Vector();
1334        hCounts = new Vector();
1335        atomParities = new Vector();
1336        bondid = new Vector();
1337        bondARef1 = new Vector();
1338        bondARef2 = new Vector();
1339        order = new Vector();
1340        bondStereo = new Vector();
1341
1342        atomElements.put("id", elid);
1343        atomElements.put("elementType", elsym);
1344        atomElements.put("x2", x2);
1345        atomElements.put("y2", y2);
1346        atomElements.put("x3", x3);
1347        atomElements.put("y3", y3);
1348        atomElements.put("z3", z3);
1349        atomElements.put("formalCharge", formalCharges);
1350        atomElements.put("isotopes", isotopes);
1351        atomElements.put("hydrogenCount", hCounts);
1352
1353        //descriptors
1354        scalars = new Vector();
1355        strings = new Vector();
1356        arrays = new Vector();
1357        matrices = new Vector();
1358
1359        // parser
1360        scalar = new ScalarCML(scalars);
1361        array = new ArrayCML(arrays);
1362        matrix = new MatrixCML(matrices);
1363
1364        // no molecule name
1365        moleculeName = null;
1366    }
1367
1368    protected void notify(String message, String systemId, int line, int column)
1369    {
1370        if (logger.isDebugEnabled())
1371        {
1372            logger.debug("Message=" + message + " systemID=" + systemId +
1373                " line=" + line + " column=" + column);
1374        }
1375    }
1376
1377    protected void storeData()
1378    {
1379        int atomcount = elid.size();
1380
1381        if (logger.isDebugEnabled())
1382        {
1383            logger.debug("No atom ids: " + atomcount);
1384        }
1385
1386        boolean has3D = false;
1387        boolean has2D = false;
1388        boolean hasFormalCharge = false;
1389        boolean hasPartialCharge = false;
1390        boolean hasHCounts = false;
1391        boolean hasSymbols = false;
1392        boolean hasIsotopes = false;
1393
1394        if (elsym.size() == atomcount)
1395        {
1396            hasSymbols = true;
1397        }
1398        else
1399        {
1400            if (logger.isDebugEnabled())
1401            {
1402                logger.debug("No atom symbols: " + elsym.size() + " != " +
1403                    atomcount);
1404            }
1405        }
1406
1407        if ((x3.size() == atomcount) && (y3.size() == atomcount) &&
1408                (z3.size() == atomcount))
1409        {
1410            has3D = true;
1411        }
1412        else
1413        {
1414            if (logger.isDebugEnabled())
1415            {
1416                logger.debug("No 3D info: " + x3.size() + " " + y3.size() +
1417                    " " + z3.size() + " != " + atomcount);
1418            }
1419        }
1420
1421        if ((x2.size() == atomcount) && (y2.size() == atomcount))
1422        {
1423            has2D = true;
1424        }
1425        else
1426        {
1427            if (logger.isDebugEnabled())
1428            {
1429                logger.debug("No 2D info: " + x2.size() + " " + y2.size() +
1430                    " != " + atomcount);
1431            }
1432        }
1433
1434        if (formalCharges.size() == atomcount)
1435        {
1436            hasFormalCharge = true;
1437        }
1438        else
1439        {
1440            if (logger.isDebugEnabled())
1441            {
1442                logger.debug("No formal Charge info: " + formalCharges.size() +
1443                    " != " + atomcount);
1444            }
1445        }
1446
1447        if (isotopes.size() == atomcount)
1448        {
1449            hasIsotopes = true;
1450        }
1451        else
1452        {
1453            if (logger.isDebugEnabled())
1454            {
1455                logger.debug("No formal Charge info: " + isotopes.size() +
1456                    " != " + atomcount);
1457            }
1458        }
1459
1460        if (partialCharges.size() == atomcount)
1461        {
1462            hasPartialCharge = true;
1463        }
1464        else
1465        {
1466            if (logger.isDebugEnabled())
1467            {
1468                logger.debug("No partial Charge info: " +
1469                    partialCharges.size() + " != " + atomcount);
1470            }
1471        }
1472
1473        if (hCounts.size() == atomcount)
1474        {
1475            hasHCounts = true;
1476        }
1477        else
1478        {
1479            if (logger.isDebugEnabled())
1480            {
1481                logger.debug("No hydrogen Count info: " + hCounts.size() +
1482                    " != " + atomcount);
1483            }
1484        }
1485
1486        for (int i = 0; i < atomcount; i++)
1487        {
1488            //logger.info("Storing atom: " + i);