Source code: joelib/io/types/cml/CMLCoreModule.java
1 ///////////////////////////////////////////////////////////////////////////////
2 //Filename: $RCSfile: CMLCoreModule.java,v $
3 //Purpose: Chemical Markup Language.
4 //Language: Java
5 //Compiler: JDK 1.4
6 //Authors: steinbeck@ice.mpg.de, gezelter@maul.chem.nd.edu,
7 // egonw@sci.kun.nl, wegnerj@informatik.uni-tuebingen.de
8 //Version: $Revision: 1.13 $
9 // $Date: 2003/08/22 15:56:18 $
10 // $Author: wegner $
11 //
12 //Copyright (C) 1997-2003 The Chemistry Development Kit (CDK) project
13 //Copyright (c) Dept. Computer Architecture, University of Tuebingen, Germany
14 //
15 //This program is free software; you can redistribute it and/or
16 //modify it under the terms of the GNU Lesser General Public License
17 //as published by the Free Software Foundation; either version 2.1
18 //of the License, or (at your option) any later version.
19 //All we ask is that proper credit is given for our work, which includes
20 //- but is not limited to - adding the above copyright notice to the beginning
21 //of your source code files, and to any copyright notice that you may distribute
22 //with programs based on this work.
23 //
24 //This program is distributed in the hope that it will be useful,
25 //but WITHOUT ANY WARRANTY; without even the implied warranty of
26 //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 //GNU Lesser General Public License for more details.
28 //
29 //You should have received a copy of the GNU Lesser General Public License
30 //along with this program; if not, write to the Free Software
31 //Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
32 ///////////////////////////////////////////////////////////////////////////////
33 package joelib.io.types.cml;
34
35 import joelib.desc.result.*;
36
37 import joelib.io.*;
38
39 import joelib.io.types.cml.elements.*;
40
41 import joelib.util.*;
42
43 import joelib.util.types.*;
44
45 import java.util.*;
46
47 import org.apache.log4j.*;
48
49 import org.xml.sax.*;
50
51
52 /**
53 * Core CML 1.x and 2.0 elements are parsed by this class.
54 *
55 * <p>Please file a bug report if this parser fails to parse
56 * a certain element or attribute value in a valid CML document.
57 *
58 * @author egonw
59 * @author c.steinbeck@uni-koeln.de
60 * @author gezelter@maul.chem.nd.edu
61 * @author wegnerj
62 * @license LGPL
63 * @cvsversion $Revision: 1.13 $, $Date: 2003/08/22 15:56:18 $
64 * @cite rr99b
65 * @cite mr01
66 * @cite gmrw01
67 * @cite wil01
68 **/
69 public class CMLCoreModule implements ModuleInterface
70 {
71 //~ Static fields/initializers /////////////////////////////////////////////
72
73 // Obtain a suitable logger.
74 private static Category logger = Category.getInstance(
75 "joelib.io.types.cml.CMLCoreModule");
76 public final static int UNKNOWN = -1;
77 public final static int STRING = 1;
78 public final static int LINK = 2;
79 public final static int FLOAT = 3;
80 public final static int INTEGER = 4;
81 public final static int STRINGARRAY = 5;
82 public final static int FLOATARRAY = 6;
83 public final static int INTEGERARRAY = 7;
84 public final static int FLOATMATRIX = 8;
85 public final static int COORDINATE2 = 9;
86 public final static int COORDINATE3 = 10;
87 public final static int ANGLE = 11;
88 public final static int TORSION = 12;
89 public final static int LIST = 13;
90 public final static int MOLECULE = 14;
91 public final static int ATOM = 15;
92 public final static int ATOMARRAY = 16;
93 public final static int BOND = 17;
94 public final static int BONDARRAY = 18;
95 public final static int ELECTRON = 19;
96 public final static int REACTION = 20;
97 public final static int CRYSTAL = 21;
98 public final static int SEQUENCE = 22;
99 public final static int FEATURE = 23;
100 public final static int MATRIX = 24;
101 public final static int ARRAY = 25;
102 public final static int SCALAR = 26;
103 public final static int BOND_STEREO = 27;
104 public final static int NAME = 28;
105
106 //~ Instance fields ////////////////////////////////////////////////////////
107
108 public Hashtable atomElements = new Hashtable(23);
109 public Hashtable elements = new Hashtable(23);
110 protected final String SYSTEMID = "CML-1999-05-15";
111
112 // array
113 protected ArrayCML array;
114 protected CDOInterface cdo;
115
116 // matrix
117 protected MatrixCML matrix;
118
119 //scalar
120 protected ScalarCML scalar;
121 protected String BUILTIN;
122 protected String arrayTitle;
123 protected String currentChars;
124 protected String delimiter;
125 protected String elementTitle;
126 protected String matrixColumns;
127 protected String matrixDelimiter;
128 protected String matrixRows;
129 protected String matrixTitle;
130 protected String moleculeName;
131 protected Vector arrays;
132 protected Vector atomParities;
133 protected Vector bondARef1;
134 protected Vector bondARef2;
135 protected Vector bondStereo;
136 protected Vector bondid;
137 protected Vector elid;
138 protected Vector elsym;
139 protected Vector formalCharges;
140 protected Vector hCounts;
141 protected Vector isotopes;
142 protected Vector matrices;
143 protected Vector order;
144 protected Vector partialCharges;
145
146 //descriptors
147 protected Vector scalars;
148 protected Vector strings;
149 protected Vector x2;
150 protected Vector x3;
151 protected Vector y2;
152 protected Vector y3;
153 protected Vector z3;
154 protected boolean stereoGiven;
155 protected int curRef;
156 protected int currentElement;
157
158 //~ Constructors ///////////////////////////////////////////////////////////
159
160 public CMLCoreModule(CDOInterface cdo)
161 {
162 initialize();
163 this.cdo = cdo;
164 }
165
166 public CMLCoreModule(ModuleInterface conv)
167 {
168 initialize();
169 inherit(conv);
170 }
171
172 //~ Methods ////////////////////////////////////////////////////////////////
173
174 public void characterData(char[] ch, int start, int length)
175 {
176 //logger.debug("CD");
177 String s = (new String(ch, start, length));
178
179 //System.out.println("start:"+start+" length:"+length+" s:"+s);
180 if (s.trim().length() == 0)
181 {
182 return;
183 }
184
185 //System.out.println(BUILTIN+" "+elementTitle+"="+s);
186 switch (currentElement)
187 {
188 case STRING:
189
190 if (logger.isDebugEnabled())
191 {
192 logger.debug("Builtin: " + BUILTIN);
193 }
194
195 if (BUILTIN.equals("elementType"))
196 {
197 //logger.debug("Element: " + s.trim());
198 elsym.addElement(s);
199 }
200 else if (BUILTIN.equals("atomRef"))
201 {
202 curRef++;
203
204 //logger.debug("Bond: ref #" + curRef);
205 if (curRef == 1)
206 {
207 bondARef1.addElement(s.trim());
208 }
209 else if (curRef == 2)
210 {
211 bondARef2.addElement(s.trim());
212 }
213 }
214 else if (BUILTIN.equals("order"))
215 {
216 //logger.debug("Bond: order " + s.trim());
217 order.addElement(s.trim());
218 }
219 else if (BUILTIN.equals("stereo"))
220 {
221 //logger.debug("Bond: stereo " + s.trim());
222 //System.out.println("Bond: stereo " + s.trim());
223 bondStereo.addElement(s.trim());
224 stereoGiven = true;
225 }
226 else if (BUILTIN.equals("formalCharge"))
227 {
228 // NOTE: this combination is in violation of the CML DTD!!!
229 //logger.debug("Charge: " + s.trim());
230 formalCharges.addElement(s.trim());
231 }
232 else if (BUILTIN.equals("isotope"))
233 {
234 isotopes.addElement(s.trim());
235 }
236 else
237 {
238 String tmp = s.trim();
239
240 if (tmp.length() != 0)
241 {
242 strings.add(new StringString(elementTitle, tmp));
243 }
244 }
245
246 break;
247
248 case FLOAT:
249
250 if (BUILTIN.equals("x3"))
251 {
252 x3.addElement(s.trim());
253 }
254 else if (BUILTIN.equals("y3"))
255 {
256 y3.addElement(s.trim());
257 }
258 else if (BUILTIN.equals("z3"))
259 {
260 z3.addElement(s.trim());
261 }
262 else if (BUILTIN.equals("x2"))
263 {
264 x2.addElement(s.trim());
265 }
266 else if (BUILTIN.equals("y2"))
267 {
268 y2.addElement(s.trim());
269 }
270 else if (BUILTIN.equals("order"))
271 {
272 // NOTE: this combination is in violation of the CML DTD!!!
273 order.addElement(s.trim());
274 }
275 else if (BUILTIN.equals("charge") ||
276 BUILTIN.equals("partialCharge"))
277 {
278 partialCharges.addElement(s.trim());
279 }
280 else
281 {
282 String tmp = s.trim();
283 DoubleResult dr = new DoubleResult();
284
285 if (!dr.fromString(IOTypeHolder.instance().getIOType("CML"), tmp))
286 {
287 logger.error("Double entry " + elementTitle + "=" + tmp +
288 " was not successfully parsed.");
289 }
290 else
291 {
292 scalars.add(new StringObject(elementTitle, dr));
293 }
294 }
295
296 break;
297
298 case INTEGER:
299
300 if (BUILTIN.equals("formalCharge"))
301 {
302 formalCharges.addElement(s.trim());
303 }
304 else if (BUILTIN.equals("hydrogenCount"))
305 {
306 hCounts.addElement(s.trim());
307 }
308 else if (BUILTIN.equals("isotope"))
309 {
310 isotopes.addElement(s.trim());
311 }
312 else
313 {
314 IntResult ir = new IntResult();
315
316 if (!ir.fromString(IOTypeHolder.instance().getIOType("CML"),
317 s.trim()))
318 {
319 logger.error("Integer entry " + elementTitle + "=" +
320 s.trim() + " was not successfully parsed.");
321 }
322 else
323 {
324 scalars.add(new StringObject(elementTitle, ir));
325 }
326 }
327
328 break;
329
330 case COORDINATE2:
331
332 if (BUILTIN.equals("xy2"))
333 {
334 //logger.debug("New coord2 xy2 found." + s);
335 try
336 {
337 StringTokenizer st = new StringTokenizer(s.trim());
338 x2.addElement(st.nextToken());
339 y2.addElement(st.nextToken());
340 }
341 catch (Exception e)
342 {
343 notify("CMLParsing error: " + e, SYSTEMID, 175, 1);
344 }
345 }
346
347 break;
348
349 case COORDINATE3:
350 currentChars = currentChars + s;
351
352 break;
353
354 case STRINGARRAY:
355
356 if (BUILTIN.equals("id") || BUILTIN.equals("atomId"))
357 {
358 // use of "id" seems incorrect by quick look at DTD
359 try
360 {
361 StringTokenizer st = new StringTokenizer(s.trim());
362
363 while (st.hasMoreTokens())
364 {
365 String token = st.nextToken();
366
367 //logger.debug("StringArray (Token): " + token);
368 elid.addElement(token);
369 }
370 }
371 catch (Exception e)
372 {
373 notify("CMLParsing error: " + e, SYSTEMID, 186, 1);
374 }
375 }
376 else if (BUILTIN.equals("elementType"))
377 {
378 try
379 {
380 StringTokenizer st = new StringTokenizer(s.trim());
381
382 while (st.hasMoreTokens())
383 {
384 elsym.addElement(st.nextToken());
385 }
386 }
387 catch (Exception e)
388 {
389 notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
390 }
391 }
392 else if (BUILTIN.equals("atomRefs"))
393 {
394 curRef++;
395
396 //logger.debug("New atomRefs found: " + curRef);
397 try
398 {
399 StringTokenizer st = new StringTokenizer(s.trim());
400
401 while (st.hasMoreTokens())
402 {
403 String token = st.nextToken();
404
405 //logger.debug("Token: " + token);
406 if (curRef == 1)
407 {
408 bondARef1.addElement(token);
409 }
410 else if (curRef == 2)
411 {
412 bondARef2.addElement(token);
413 }
414 }
415 }
416 catch (Exception e)
417 {
418 notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
419 }
420 }
421 else if (BUILTIN.equals("order"))
422 {
423 //logger.debug("New bond order found.");
424 try
425 {
426 StringTokenizer st = new StringTokenizer(s.trim());
427
428 while (st.hasMoreTokens())
429 {
430 String token = st.nextToken();
431
432 //logger.debug("Token: " + token);
433 order.addElement(token);
434 }
435 }
436 catch (Exception e)
437 {
438 notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
439 }
440 }
441 else if (BUILTIN.equals("stereo"))
442 {
443 //logger.debug("New bond order found.");
444 try
445 {
446 StringTokenizer st = new StringTokenizer(s.trim());
447
448 while (st.hasMoreTokens())
449 {
450 String token = st.nextToken();
451
452 //logger.debug("Token: " + token);
453 bondStereo.addElement(token);
454 }
455 }
456 catch (Exception e)
457 {
458 notify("CMLParsing error: " + e, SYSTEMID, 194, 1);
459 }
460 }
461
462 break;
463
464 case INTEGERARRAY:
465
466 //logger.debug("IntegerArray: builtin = " + BUILTIN);
467 if (BUILTIN.equals("formalCharge"))
468 {
469 try
470 {
471 StringTokenizer st = new StringTokenizer(s.trim());
472
473 while (st.hasMoreTokens())
474 {
475 String token = st.nextToken();
476
477 //logger.debug("Charge added: " + token);
478 formalCharges.addElement(token);
479 }
480 }
481 catch (Exception e)
482 {
483 notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
484 }
485 }
486 else if (BUILTIN.equals("hydrogenCount"))
487 {
488 try
489 {
490 StringTokenizer st = new StringTokenizer(s);
491
492 while (st.hasMoreTokens())
493 {
494 String token = st.nextToken();
495
496 //logger.debug("Hydrogen count added: " + token);
497 hCounts.addElement(token);
498 }
499 }
500 catch (Exception e)
501 {
502 notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
503 }
504 }
505 else if (BUILTIN.equals("isotope"))
506 {
507 try
508 {
509 StringTokenizer st = new StringTokenizer(s.trim());
510
511 while (st.hasMoreTokens())
512 {
513 String token = st.nextToken();
514
515 //logger.debug("Isotope added: " + token);
516 isotopes.addElement(token);
517 }
518 }
519 catch (Exception e)
520 {
521 notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
522 }
523 }
524 else
525 {
526 String tmp = s.trim();
527
528 if (tmp.length() != 0)
529 {
530 //System.out.println("intArray " + elementTitle);
531 IntArrayResult iar = new IntArrayResult();
532
533 if (delimiter == null)
534 {
535 delimiter = " \t\r\n";
536 }
537 else
538 {
539 iar.addCMLProperty(new StringString("delimiter",
540 delimiter));
541 }
542
543 if (!iar.fromString(IOTypeHolder.instance().getIOType("CML"),
544 tmp))
545 {
546 logger.error("Integer array entry " + arrayTitle + "=" +
547 tmp + " was not successfully parsed.");
548 }
549
550 iar.setIntArray(ArrayHelper.intArrayFromSimpleString(tmp,
551 delimiter));
552 arrays.add(new StringObject(elementTitle, iar));
553 }
554 }
555
556 break;
557
558 case FLOATARRAY:
559
560 //System.out.println(BUILTIN+"="+s);
561 if (BUILTIN.equals("x3"))
562 {
563 try
564 {
565 StringTokenizer st = new StringTokenizer(s.trim());
566
567 while (st.hasMoreTokens())
568 {
569 x3.addElement(st.nextToken());
570 }
571 }
572 catch (Exception e)
573 {
574 notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
575 }
576 }
577 else if (BUILTIN.equals("y3"))
578 {
579 try
580 {
581 StringTokenizer st = new StringTokenizer(s.trim());
582
583 while (st.hasMoreTokens())
584 {
585 y3.addElement(st.nextToken());
586 }
587 }
588 catch (Exception e)
589 {
590 notify("CMLParsing error: " + e, SYSTEMID, 213, 1);
591 }
592 }
593 else if (BUILTIN.equals("z3"))
594 {
595 try
596 {
597 StringTokenizer st = new StringTokenizer(s.trim());
598
599 while (st.hasMoreTokens())
600 {
601 z3.addElement(st.nextToken());
602 }
603 }
604 catch (Exception e)
605 {
606 notify("CMLParsing error: " + e, SYSTEMID, 221, 1);
607 }
608 }
609 else if (BUILTIN.equals("x2"))
610 {
611 //logger.debug("New floatArray found.");
612 try
613 {
614 StringTokenizer st = new StringTokenizer(s.trim());
615
616 while (st.hasMoreTokens())
617 {
618 x2.addElement(st.nextToken());
619 }
620 }
621 catch (Exception e)
622 {
623 notify("CMLParsing error: " + e, SYSTEMID, 205, 1);
624 }
625 }
626 else if (BUILTIN.equals("y2"))
627 {
628 //logger.debug("New floatArray found.");
629 try
630 {
631 StringTokenizer st = new StringTokenizer(s.trim());
632
633 while (st.hasMoreTokens())
634 {
635 y2.addElement(st.nextToken());
636 }
637 }
638 catch (Exception e)
639 {
640 notify("CMLParsing error: " + e, SYSTEMID, 454, 1);
641 }
642 }
643 else if (BUILTIN.equals("partialCharge"))
644 {
645 //logger.debug("New floatArray with partial charges found.");
646 try
647 {
648 StringTokenizer st = new StringTokenizer(s.trim());
649
650 while (st.hasMoreTokens())
651 {
652 partialCharges.addElement(st.nextToken());
653 }
654 }
655 catch (Exception e)
656 {
657 notify("CMLParsing error: " + e, SYSTEMID, 462, 1);
658 }
659 }
660 else if (BUILTIN.equals("formalCharge"))
661 {
662 //logger.debug("New floatArray with partial charges found.");
663 try
664 {
665 StringTokenizer st = new StringTokenizer(s.trim());
666
667 while (st.hasMoreTokens())
668 {
669 formalCharges.addElement(st.nextToken());
670 }
671 }
672 catch (Exception e)
673 {
674 notify("CMLParsing error: " + e, SYSTEMID, 462, 1);
675 }
676 }
677 else
678 {
679 String tmp = s.trim();
680
681 if (tmp.length() != 0)
682 {
683 //System.out.println("floatArray " + elementTitle);
684 DoubleArrayResult dar = new DoubleArrayResult();
685
686 if (delimiter == null)
687 {
688 delimiter = " \t\r\n";
689 }
690 else
691 {
692 dar.addCMLProperty(new StringString("delimiter",
693 delimiter));
694 }
695
696 if (!dar.fromString(IOTypeHolder.instance().getIOType("CML"),
697 tmp))
698 {
699 logger.error("Double array entry " + arrayTitle + "=" +
700 tmp + " was not successfully parsed.");
701 }
702 else
703 {
704 dar.setDoubleArray(ArrayHelper.doubleArrayFromSimpleString(
705 tmp, delimiter));
706 arrays.add(new StringObject(elementTitle, dar));
707 }
708 }
709 }
710
711 break;
712
713 case FLOATMATRIX:
714
715 //logger.debug("FloatMatrix: builtin = " + BUILTIN);
716 //if (BUILTIN.equals("???"))
717 //{
718 //}
719 //else
720 //{
721 String tmp = s.trim();
722
723 if (tmp.length() != 0)
724 {
725 // much more efficient and more standard
726 DoubleMatrixResult matrix = new DoubleMatrixResult();
727
728 if (matrixDelimiter == null)
729 {
730 matrixDelimiter = " \t\r\n";
731 }
732 else
733 {
734 matrix.addCMLProperty(new StringString("delimiter",
735 matrixDelimiter));
736 }
737
738 if (matrixRows == null)
739 {
740 logger.error("Number of rows is missing in FloatMatrix '" +
741 matrixTitle + "'.");
742 }
743 else if (matrixColumns == null)
744 {
745 logger.error(
746 "Number of columns is missing in FloatMatrix '" +
747 matrixTitle + "'.");
748 }
749 else
750 {
751 int rows = Integer.parseInt(matrixRows);
752 int columns = Integer.parseInt(matrixColumns);
753 matrix.value = MatrixHelper.doubleMatrixFromSimpleString(tmp,
754 rows, columns, matrixDelimiter);
755
756 if (matrix.value != null)
757 {
758 matrices.add(new StringObject(matrixTitle, matrix));
759 }
760
761 //floatMatrices.add(new StringObject(matrixTitle, tmp));
762 }
763
764 //}
765 }
766
767 break;
768
769 case MATRIX:
770
771 if (matrix.characterData(s))
772 {
773 //o.k., all is fine
774 }
775
776 break;
777
778 case ARRAY:
779
780 if (array.characterData(s))
781 {
782 //o.k., all is fine
783 }
784
785 break;
786
787 case SCALAR:
788
789 if (scalar.characterData(s))
790 {
791 //o.k., all is fine
792 }
793
794 break;
795
796 case NAME:
797
798 //System.out.println("Molecule name :"+s.trim());
799 moleculeName = s.trim();
800
801 break;
802 }
803 }
804
805 public void endDocument()
806 {
807 cdo.endDocument();
808
809 if (logger.isDebugEnabled())
810 {
811 logger.debug("End XML Doc");
812 }
813 }
814
815 public void endElement(String uri, String name, String raw)
816 {
817 if (logger.isDebugEnabled())
818 {
819 logger.debug("EndElement: " + name);
820 }
821
822 setCurrentElement(name);
823
824 switch (currentElement)
825 {
826 case BOND:
827
828 if (!stereoGiven)
829 {
830 bondStereo.addElement("");
831
832 //System.out.println("Add stereo info <empty>");
833 }
834
835 break;
836
837 case ATOM:
838
839 if (elsym.size() > formalCharges.size())
840 {
841 /* while strictly undefined, assume zero
842 charge when no number is given */
843 formalCharges.addElement("0");
844 }
845
846 if (elsym.size() > hCounts.size())
847 {
848 /* while strictly undefined, assume zero
849 implicit hydrogens when no number is given */
850 hCounts.addElement("0");
851 }
852
853 if (elsym.size() > isotopes.size())
854 {
855 /* while strictly undefined, assume zero
856 charge when no number is given */
857 isotopes.addElement("0");
858 }
859
860 /* It may happen that not all atoms have
861 associated 2D coordinates. accept that */
862 if ((elsym.size() > x2.size()) && (x2.size() != 0))
863 {
864 /* apparently, the previous atoms had atomic
865 coordinates, add 'null' for this atom */
866 x2.add(null);
867 y2.add(null);
868 }
869
870 break;
871
872 case MOLECULE:
873 storeData();
874 cdo.endObject("Molecule");
875
876 break;
877
878 case COORDINATE3:
879
880 if (BUILTIN.equals("xyz3"))
881 {
882 logger.debug("New coord3 xyz3 found: " + currentChars);
883
884 try
885 {
886 StringTokenizer st = new StringTokenizer(currentChars);
887 x3.addElement(st.nextToken());
888 y3.addElement(st.nextToken());
889 z3.addElement(st.nextToken());
890
891 if (logger.isDebugEnabled())
892 {
893 //logger.debug("coord3 x3.length: " + x3.size());
894 //logger.debug("coord3 y3.length: " + y3.size());
895 //logger.debug("coord3 z3.length: " + z3.size());
896 }
897 }
898 catch (Exception e)
899 {
900 logger.error("CMLParsing error while setting coordinate3!");
901 }
902 }
903 else
904 {
905 logger.warn("Unknown coordinate3 BUILTIN: " + BUILTIN);
906 }
907
908 break;
909
910 case MATRIX:
911 matrix.endElement(name);
912
913 break;
914
915 case ARRAY:
916 array.endElement(name);
917
918 break;
919
920 case SCALAR:
921 scalar.endElement(name);
922
923 break;
924
925 case NAME:
926 break;
927 }
928
929 currentChars = "";
930 BUILTIN = "";
931 elementTitle = "";
932 }
933
934 public void inherit(ModuleInterface convention)
935 {
936 if (convention instanceof CMLCoreModule)
937 {
938 CMLCoreModule conv = (CMLCoreModule) convention;
939 this.logger = conv.logger;
940 this.cdo = conv.returnCDO();
941 this.BUILTIN = conv.BUILTIN;
942 this.elsym = conv.elsym;
943 this.elid = conv.elid;
944 this.formalCharges = conv.formalCharges;
945 this.partialCharges = conv.partialCharges;
946 this.isotopes = conv.isotopes;
947 this.x3 = conv.x3;
948 this.y3 = conv.y3;
949 this.z3 = conv.z3;
950 this.x2 = conv.x2;
951 this.y2 = conv.y2;
952 this.hCounts = conv.hCounts;
953 this.atomParities = conv.atomParities;
954 this.bondid = conv.bondid;
955 this.bondARef1 = conv.bondARef1;
956 this.bondARef2 = conv.bondARef2;
957 this.order = conv.order;
958 this.bondStereo = conv.bondStereo;
959 this.curRef = conv.curRef;
960
961 this.atomElements = conv.atomElements;
962
963 //descriptors
964 this.scalars = conv.scalars;
965 this.strings = conv.strings;
966 this.matrices = conv.matrices;
967 this.arrays = conv.arrays;
968
969 // parser
970 this.scalar = conv.scalar;
971 this.array = conv.array;
972 this.matrix = conv.matrix;
973 }
974 }
975
976 public CDOInterface returnCDO()
977 {
978 return (CDOInterface) this.cdo;
979 }
980
981 public void startDocument()
982 {
983 if (logger.isDebugEnabled())
984 {
985 logger.debug("Start XML Doc");
986 }
987
988 cdo.startDocument();
989 newMolecule();
990 BUILTIN = "";
991 curRef = 0;
992 }
993
994 public void startElement(String uri, String local, String raw,
995 Attributes atts)
996 {
997 String name = local;
998
999 if (logger.isDebugEnabled())
1000 {
1001 logger.debug("StartElement");
1002 }
1003
1004 setCurrentElement(name);
1005
1006 switch (currentElement)
1007 {
1008 case ATOM:
1009
1010 for (int i = 0; i < atts.getLength(); i++)
1011 {
1012 String att = atts.getQName(i);
1013 String value = atts.getValue(i);
1014
1015 if (logger.isDebugEnabled())
1016 {
1017 logger.debug("Atom: " + att + "=" + value);
1018 }
1019
1020 Vector vector = (Vector) atomElements.get(att);
1021
1022 if (vector != null)
1023 {
1024 vector.addElement(value);
1025 }
1026 else
1027 {
1028 if (logger.isDebugEnabled())
1029 {
1030 logger.debug("Unsupported attribute: " + att);
1031 }
1032 }
1033 }
1034
1035 break;
1036
1037 case BOND:
1038 stereoGiven = false;
1039
1040 for (int i = 0; i < atts.getLength(); i++)
1041 {
1042 String att = atts.getQName(i);
1043
1044 if (logger.isDebugEnabled())
1045 {
1046 logger.debug("B2 " + att + "=" + atts.getValue(i));
1047 }
1048
1049 if (att.equals("id"))
1050 {
1051 bondid.addElement(atts.getValue(i));
1052 logger.debug("B3 " + bondid);
1053 }
1054 else if (att.equals("atomRefs") || // this is CML 1.x support
1055 att.equals("atomRefs2")) // this is CML 1.x support
1056 { // this is CML 2.0 support
1057
1058 // expect exactly two references
1059 try
1060 {
1061 StringTokenizer st = new StringTokenizer(atts.getValue(
1062 i));
1063 bondARef1.addElement((String) st.nextElement());
1064 bondARef2.addElement((String) st.nextElement());
1065 }
1066 catch (Exception e)
1067 {
1068 logger.error("Error in CML file: " + e.toString());
1069 }
1070 }
1071 else if (att.equals("order"))
1072 { // this is CML 2.0 support
1073 order.addElement(atts.getValue(i).trim());
1074 }
1075 else if (att.equals("stereo"))
1076 { // this is CML 2.0 support
1077 bondStereo.addElement(atts.getValue(i).trim());
1078 stereoGiven = true;
1079 System.out.println("Add stereo info " +
1080 atts.getValue(i).trim());
1081 }
1082 }
1083
1084 curRef = 0;
1085
1086 break;
1087
1088 case COORDINATE2:
1089
1090 for (int i = 0; i < atts.getLength(); i++)
1091 {
1092 if (atts.getQName(i).equals("builtin"))
1093 {
1094 BUILTIN = atts.getValue(i);
1095 logger.debug("Valid element coord found, builtin: " +
1096 atts.getValue(i));
1097 }
1098 }
1099
1100 break;
1101
1102 case COORDINATE3:
1103
1104 for (int i = 0; i < atts.getLength(); i++)
1105 {
1106 if (atts.getQName(i).equals("builtin"))
1107 {
1108 logger.debug("BUILTIN value set for coordinate3: " +
1109 atts.getValue(i));
1110 BUILTIN = atts.getValue(i);
1111 }
1112 else
1113 {
1114 logger.warn("Unkown coordinate3 builtin value: " +
1115 atts.getValue(i));
1116 }
1117 }
1118
1119 break;
1120
1121 case STRING:
1122
1123 for (int i = 0; i < atts.getLength(); i++)
1124 {
1125 //System.out.println("string att:"+atts.getQName(i)+" val:"+atts.getValue(i));
1126 if (atts.getQName(i).equals("builtin"))
1127 {
1128 BUILTIN = atts.getValue(i);
1129 }
1130 else if (atts.getQName(i).equals("title"))
1131 {
1132 elementTitle = atts.getValue(i);
1133 }
1134 }
1135
1136 break;
1137
1138 case FLOAT:
1139
1140 for (int i = 0; i < atts.getLength(); i++)
1141 {
1142 if (atts.getQName(i).equals("builtin"))
1143 {
1144 BUILTIN = atts.getValue(i);
1145 }
1146 else if (atts.getQName(i).equals("title"))
1147 {
1148 elementTitle = atts.getValue(i);
1149 }
1150 }
1151
1152 break;
1153
1154 case INTEGER:
1155
1156 for (int i = 0; i < atts.getLength(); i++)
1157 {
1158 if (atts.getQName(i).equals("builtin"))
1159 {
1160 BUILTIN = atts.getValue(i);
1161 }
1162 else if (atts.getQName(i).equals("title"))
1163 {
1164 elementTitle = atts.getValue(i);
1165 }
1166 }
1167
1168 break;
1169
1170 case ATOMARRAY:
1171 break;
1172
1173 case INTEGERARRAY:
1174 delimiter = null;
1175
1176 for (int i = 0; i < atts.getLength(); i++)
1177 {
1178 if (atts.getQName(i).equals("builtin"))
1179 {
1180 BUILTIN = atts.getValue(i);
1181 }
1182 else if (atts.getQName(i).equals("title"))
1183 {
1184 elementTitle = atts.getValue(i);
1185 }
1186 }
1187
1188 break;
1189
1190 case STRINGARRAY:
1191 delimiter = null;
1192
1193 for (int i = 0; i < atts.getLength(); i++)
1194 {
1195 if (atts.getQName(i).equals("builtin"))
1196 {
1197 BUILTIN = atts.getValue(i);
1198 }
1199 else if (atts.getQName(i).equals("title"))
1200 {
1201 elementTitle = atts.getValue(i);
1202 }
1203 }
1204
1205 break;
1206
1207 case FLOATARRAY:
1208 delimiter = null;
1209
1210 for (int i = 0; i < atts.getLength(); i++)
1211 {
1212 if (atts.getQName(i).equals("builtin"))
1213 {
1214 BUILTIN = atts.getValue(i);
1215 }
1216
1217 if (atts.getQName(i).equals("title"))
1218 {
1219 elementTitle = atts.getValue(i);
1220 }
1221 }
1222
1223 break;
1224
1225 case FLOATMATRIX:
1226 matrixRows = null;
1227 matrixColumns = null;
1228 matrixDelimiter = null;
1229
1230 for (int i = 0; i < atts.getLength(); i++)
1231 {
1232 if (atts.getQName(i).equals("builtin"))
1233 {
1234 BUILTIN = atts.getValue(i);
1235 }
1236
1237 if (atts.getQName(i).equals("title"))
1238 {
1239 matrixTitle = atts.getValue(i);
1240 }
1241 else if (atts.getQName(i).equals("rows"))
1242 {
1243 matrixRows = atts.getValue(i);
1244 }
1245 else if (atts.getQName(i).equals("columns"))
1246 {
1247 matrixColumns = atts.getValue(i);
1248 }
1249 else if (atts.getQName(i).equals("delimiter"))
1250 {
1251 matrixDelimiter = atts.getValue(i);
1252 }
1253 }
1254
1255 break;
1256
1257 case MOLECULE:
1258 newMolecule();
1259 BUILTIN = "";
1260 cdo.startObject("Molecule");
1261
1262 break;
1263
1264 case CRYSTAL:
1265 cdo.startObject("Crystal");
1266
1267 break;
1268
1269 case LIST:
1270 break;
1271
1272 case MATRIX:
1273 matrix.clear();
1274
1275 for (int i = 0; i < atts.getLength(); i++)
1276 {
1277 matrix.startElement(atts.getQName(i), atts.getValue(i));
1278 }
1279
1280 break;
1281
1282 case ARRAY:
1283 array.clear();
1284
1285 for (int i = 0; i < atts.getLength(); i++)
1286 {
1287 array.startElement(atts.getQName(i), atts.getValue(i));
1288 }
1289
1290 break;
1291
1292 case SCALAR:
1293 scalar.clear();
1294
1295 for (int i = 0; i < atts.getLength(); i++)
1296 {
1297 scalar.startElement(atts.getQName(i), atts.getValue(i));
1298 }
1299
1300 break;
1301 }
1302 }
1303
1304 protected void setCurrentElement(String name)
1305 {
1306 //logger.debug("Current element: " + name);
1307 //System.out.println("Current element: " + name);
1308 Integer integer = (Integer) elements.get(name);
1309
1310 if (integer != null)
1311 {
1312 currentElement = integer.intValue();
1313 }
1314 else
1315 {
1316 currentElement = UNKNOWN;
1317 }
1318
1319 ;
1320 }
1321
1322 protected void newMolecule()
1323 {
1324 elsym = new Vector();
1325 elid = new Vector();
1326 formalCharges = new Vector();
1327 partialCharges = new Vector();
1328 isotopes = new Vector();
1329 x3 = new Vector();
1330 y3 = new Vector();
1331 z3 = new Vector();
1332 x2 = new Vector();
1333 y2 = new Vector();
1334 hCounts = new Vector();
1335 atomParities = new Vector();
1336 bondid = new Vector();
1337 bondARef1 = new Vector();
1338 bondARef2 = new Vector();
1339 order = new Vector();
1340 bondStereo = new Vector();
1341
1342 atomElements.put("id", elid);
1343 atomElements.put("elementType", elsym);
1344 atomElements.put("x2", x2);
1345 atomElements.put("y2", y2);
1346 atomElements.put("x3", x3);
1347 atomElements.put("y3", y3);
1348 atomElements.put("z3", z3);
1349 atomElements.put("formalCharge", formalCharges);
1350 atomElements.put("isotopes", isotopes);
1351 atomElements.put("hydrogenCount", hCounts);
1352
1353 //descriptors
1354 scalars = new Vector();
1355 strings = new Vector();
1356 arrays = new Vector();
1357 matrices = new Vector();
1358
1359 // parser
1360 scalar = new ScalarCML(scalars);
1361 array = new ArrayCML(arrays);
1362 matrix = new MatrixCML(matrices);
1363
1364 // no molecule name
1365 moleculeName = null;
1366 }
1367
1368 protected void notify(String message, String systemId, int line, int column)
1369 {
1370 if (logger.isDebugEnabled())
1371 {
1372 logger.debug("Message=" + message + " systemID=" + systemId +
1373 " line=" + line + " column=" + column);
1374 }
1375 }
1376
1377 protected void storeData()
1378 {
1379 int atomcount = elid.size();
1380
1381 if (logger.isDebugEnabled())
1382 {
1383 logger.debug("No atom ids: " + atomcount);
1384 }
1385
1386 boolean has3D = false;
1387 boolean has2D = false;
1388 boolean hasFormalCharge = false;
1389 boolean hasPartialCharge = false;
1390 boolean hasHCounts = false;
1391 boolean hasSymbols = false;
1392 boolean hasIsotopes = false;
1393
1394 if (elsym.size() == atomcount)
1395 {
1396 hasSymbols = true;
1397 }
1398 else
1399 {
1400 if (logger.isDebugEnabled())
1401 {
1402 logger.debug("No atom symbols: " + elsym.size() + " != " +
1403 atomcount);
1404 }
1405 }
1406
1407 if ((x3.size() == atomcount) && (y3.size() == atomcount) &&
1408 (z3.size() == atomcount))
1409 {
1410 has3D = true;
1411 }
1412 else
1413 {
1414 if (logger.isDebugEnabled())
1415 {
1416 logger.debug("No 3D info: " + x3.size() + " " + y3.size() +
1417 " " + z3.size() + " != " + atomcount);
1418 }
1419 }
1420
1421 if ((x2.size() == atomcount) && (y2.size() == atomcount))
1422 {
1423 has2D = true;
1424 }
1425 else
1426 {
1427 if (logger.isDebugEnabled())
1428 {
1429 logger.debug("No 2D info: " + x2.size() + " " + y2.size() +
1430 " != " + atomcount);
1431 }
1432 }
1433
1434 if (formalCharges.size() == atomcount)
1435 {
1436 hasFormalCharge = true;
1437 }
1438 else
1439 {
1440 if (logger.isDebugEnabled())
1441 {
1442 logger.debug("No formal Charge info: " + formalCharges.size() +
1443 " != " + atomcount);
1444 }
1445 }
1446
1447 if (isotopes.size() == atomcount)
1448 {
1449 hasIsotopes = true;
1450 }
1451 else
1452 {
1453 if (logger.isDebugEnabled())
1454 {
1455 logger.debug("No formal Charge info: " + isotopes.size() +
1456 " != " + atomcount);
1457 }
1458 }
1459
1460 if (partialCharges.size() == atomcount)
1461 {
1462 hasPartialCharge = true;
1463 }
1464 else
1465 {
1466 if (logger.isDebugEnabled())
1467 {
1468 logger.debug("No partial Charge info: " +
1469 partialCharges.size() + " != " + atomcount);
1470 }
1471 }
1472
1473 if (hCounts.size() == atomcount)
1474 {
1475 hasHCounts = true;
1476 }
1477 else
1478 {
1479 if (logger.isDebugEnabled())
1480 {
1481 logger.debug("No hydrogen Count info: " + hCounts.size() +
1482 " != " + atomcount);
1483 }
1484 }
1485
1486 for (int i = 0; i < atomcount; i++)
1487 {
1488 //logger.info("Storing atom: " + i);