Source code: joelib/util/ComparisonHelper.java
1 ///////////////////////////////////////////////////////////////////////////////
2 // Filename: $RCSfile: ComparisonHelper.java,v $
3 // Purpose: Holds all native value descriptors as double matrix for all known molecules.
4 // Language: Java
5 // Compiler: JDK 1.4
6 // Authors: Joerg K. Wegner
7 // Version: $Revision: 1.5 $
8 // $Date: 2003/08/22 15:56:21 $
9 // $Author: wegner $
10 //
11 // Copyright (c) Dept. Computer Architecture, University of Tuebingen, Germany
12 //
13 // This program is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation version 2 of the License.
16 //
17 // This program is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 // GNU General Public License for more details.
21 ///////////////////////////////////////////////////////////////////////////////
22 package joelib.util;
23
24 import cformat.*;
25
26 import joelib.data.*;
27
28 import joelib.desc.*;
29
30 import joelib.desc.result.*;
31
32 import joelib.ext.*;
33
34 import joelib.io.*;
35
36 import joelib.molecule.*;
37
38 import joelib.process.*;
39
40 import joelib.test.*;
41
42 import joelib.util.*;
43
44 import joelib.util.iterator.*;
45
46 import wsi.ra.tool.*;
47
48 /*==========================================================================*
49 * IMPORTS
50 *==========================================================================*/
51 import java.io.*;
52
53 import java.util.*;
54
55 import org.apache.log4j.*;
56
57
58 /*==========================================================================*
59 * CLASS DECLARATION
60 *==========================================================================*/
61
62 /**
63 * Example for converting molecules.
64 *
65 * @author wegnerj
66 * @license GPL
67 * @cvsversion $Revision: 1.5 $, $Date: 2003/08/22 15:56:21 $
68 */
69 public class ComparisonHelper
70 {
71 //~ Static fields/initializers /////////////////////////////////////////////
72
73 /*-------------------------------------------------------------------------*
74 * private static member variables
75 *-------------------------------------------------------------------------*/
76
77 // Obtain a suitable logger.
78 private static Category logger = Category.getInstance(
79 "joelib.util.ComparisonHelper");
80 private final static int NOT_DEFINED = -1;
81 private final static int BINARY_COMPARISON = 0;
82 private final static int EUKLIDIAN_COMPARISON = 1;
83 private final static int DIST_METRIC_COMPARISON = 2;
84
85 //~ Instance fields ////////////////////////////////////////////////////////
86
87 private String targetFile;
88 private Vector descBinaryValue;
89 private Vector descValues;
90 private Vector distMetricValue;
91 private Vector targetMols;
92 private String[] descriptors;
93 private double[] tmpValues;
94 private int comparisonType = NOT_DEFINED;
95
96 //~ Constructors ///////////////////////////////////////////////////////////
97
98 /*-------------------------------------------------------------------------*
99 * constructor
100 *-------------------------------------------------------------------------*/
101 public ComparisonHelper(IOType inType, String _targetFile)
102 {
103 this(inType, _targetFile, 10);
104 }
105
106 /**
107 * Constructor for the ComparisonHelper object
108 *
109 * @param _targetFile Description of the Parameter
110 * @param inType Description of the Parameter
111 */
112 public ComparisonHelper(IOType inType, String _targetFile, int _initialSize)
113 {
114 targetFile = _targetFile;
115
116 SimpleReader reader = null;
117
118 try
119 {
120 reader = new SimpleReader(new FileInputStream(_targetFile), inType);
121 }
122 catch (Exception ex)
123 {
124 ex.printStackTrace();
125
126 // throw ex;
127 }
128
129 targetMols = new Vector(_initialSize);
130 descValues = new Vector(_initialSize);
131 descBinaryValue = new Vector(_initialSize);
132
133 JOEMol targetMol = null;
134
135 for (;;)
136 {
137 targetMol = new JOEMol(inType, inType);
138
139 //load only the first molecule !
140 try
141 {
142 if (!reader.readNext(targetMol))
143 {
144 // targetMols = null;
145 break;
146 }
147
148 targetMols.add(targetMol);
149 }
150 catch (Exception ex)
151 {
152 targetMol = null;
153 ex.printStackTrace();
154
155 break;
156
157 // throw ex;
158 }
159 }
160
161 reader.close();
162 reader = null;
163 }
164
165 /**
166 * Constructor for the ComparisonHelper object
167 *
168 * @param _targetMolecule Description of the Parameter
169 */
170 public ComparisonHelper(JOEMol _targetMolecule)
171 {
172 int _initialSize = 1;
173 targetMols = new Vector(_initialSize);
174 descValues = new Vector(_initialSize);
175 descBinaryValue = new Vector(_initialSize);
176
177 targetMols.add(_targetMolecule);
178 }
179
180 public ComparisonHelper()
181 {
182 }
183
184 //~ Methods ////////////////////////////////////////////////////////////////
185
186 /*-------------------------------------------------------------------------*
187 * public methods
188 *-------------------------------------------------------------------------*/
189
190 /**
191 * Sets the comparisonDescriptor attribute of the ComparisonHelper object
192 *
193 * @param _descriptor The new comparisonDescriptor value
194 * @return Description of the Return Value
195 */
196 public boolean setComparisonDescriptor(String _descriptor)
197 {
198 if (_descriptor == null)
199 {
200 logger.error("No descriptors for comparison defined.");
201
202 return false;
203 }
204
205 if ((targetMols == null) || (targetMols.size() == 0))
206 {
207 logger.error("No target molecule available.");
208
209 return false;
210 }
211
212 descriptors = new String[1];
213 descriptors[0] = _descriptor;
214 tmpValues = new double[1];
215
216 DescResult result = null;
217
218 int size = targetMols.size();
219 JOEMol targetMol;
220 double[] tmpVal;
221 JOEBitVec tmpBit;
222 DistanceMetricValue tmpDistMetricValue;
223
224 for (int i = 0; i < size; i++)
225 {
226 targetMol = (JOEMol) targetMols.get(i);
227
228 // System.out.println("targetMol("+i+"):"+targetMol);
229 try
230 {
231 result = DescriptorHelper.descFromMol(targetMol, _descriptor,
232 true);
233 }
234 catch (DescriptorException ex)
235 {
236 ex.printStackTrace();
237
238 return false;
239 }
240
241 if (result == null)
242 {
243 logger.error("Descriptor '" + _descriptor +
244 "' not found in target molecule " + targetMol.getTitle() +
245 ".");
246
247 return false;
248 }
249
250 if (JOEHelper.hasInterface(result, "NativeValue"))
251 {
252 tmpVal = new double[1];
253 tmpVal[0] = ((NativeValue) result).getDoubleNV();
254 descValues.add(tmpVal);
255 comparisonType = EUKLIDIAN_COMPARISON;
256 }
257 else if (JOEHelper.hasInterface(result, "BinaryValue"))
258 {
259 tmpBit = ((BinaryValue) result).getBinaryValue();
260
261 // System.out.println("add BIT:"+tmpBit.toString());
262 descBinaryValue.add(tmpBit);
263 comparisonType = BINARY_COMPARISON;
264 }
265 else if (JOEHelper.hasInterface(result, "DistanceMetricValue"))
266 {
267 tmpDistMetricValue = (DistanceMetricValue) result;
268 distMetricValue.add(tmpDistMetricValue);
269 comparisonType = DIST_METRIC_COMPARISON;
270 }
271 else
272 {
273 logger.error("Descriptor '" + _descriptor +
274 "' must be a double, a 'bit set' value or allow a distance metric.");
275
276 return false;
277 }
278 }
279
280 return true;
281 }
282
283 /**
284 * Sets the comparisonDescriptors attribute of the ComparisonHelper object
285 *
286 * @param _descriptors The new comparisonDescriptors value
287 * @return Description of the Return Value
288 */
289 public boolean setComparisonDescriptors(String[] _descriptors)
290 {
291 if (_descriptors == null)
292 {
293 logger.error("No descriptors for comparison defined.");
294
295 return false;
296 }
297
298 if (_descriptors.length == 0)
299 {
300 logger.error("Descriptors for comparison seems to be empty.");
301
302 return false;
303 }
304
305 if ((targetMols == null) || (targetMols.size() == 0))
306 {
307 logger.error("No target molecule available.");
308
309 return false;
310 }
311
312 if (_descriptors.length == 1)
313 {
314 return setComparisonDescriptor(_descriptors[0]);
315 }
316
317 descriptors = _descriptors;
318
319 int size = _descriptors.length;
320 tmpValues = new double[size];
321
322 // descriptors= new String[size];
323 int sizeMols = targetMols.size();
324 JOEMol targetMol;
325 double[] tmpVal;
326 double value;
327
328 for (int ii = 0; ii < sizeMols; ii++)
329 {
330 targetMol = (JOEMol) targetMols.get(ii);
331 tmpVal = new double[size];
332
333 for (int i = 0; i < size; i++)
334 {
335 // descriptors[i]=_descriptors[i];
336 try
337 {
338 value = getDoubleDesc(targetMol, descriptors[i]);
339 }
340 catch (DescriptorException ex)
341 {
342 logger.error(ex.toString());
343 logger.error("Can't load double value of descriptor '" +
344 descriptors[i] + "' in target molecule.");
345
346 return false;
347 }
348
349 tmpVal[i] = value;
350 }
351
352 descValues.add(tmpVal);
353 }
354
355 comparisonType = EUKLIDIAN_COMPARISON;
356
357 return true;
358 }
359
360 public final Vector getTargetMols()
361 {
362 return targetMols;
363 }
364
365 /**
366 * Description of the Method
367 *
368 * @param mol Description of the Parameter
369 * @return Description of the Return Value
370 */
371 public double[] compare(JOEMol mol)
372 {
373 return compare(mol, null, null);
374 }
375
376 /**
377 * Description of the Method. <code>Double.NaN</code> is returned if no
378 * comparison value could be calulated
379 *
380 * @param mol Description of the Parameter
381 * @param _doubleDescName Stores the distance value at the double value
382 * descriptor with this name.
383 * @return double distance value
384 */
385 public double[] compare(JOEMol mol, String _distResultName,
386 double[] distances)
387 {
388 // get the actual descriptor values
389 int size = targetMols.size();
390
391 if (distances == null)
392 {
393 distances = new double[size];
394 }
395
396 JOEBitVec bitset = null;
397 DistanceMetricValue dMetricValue = null;
398
399 switch (comparisonType)
400 {
401 case BINARY_COMPARISON:
402 bitset = getBitset(mol);
403
404 if (bitset == null)
405 {
406 return null;
407 }
408
409 break;
410
411 case EUKLIDIAN_COMPARISON:
412 tmpValues = getDescriptors(mol, tmpValues);
413
414 if (tmpValues == null)
415 {
416 return null;
417 }
418
419 break;
420
421 case DIST_METRIC_COMPARISON:
422 dMetricValue = getDistMetricValue(mol);
423
424 if (dMetricValue == null)
425 {
426 return null;
427 }
428
429 break;
430
431 default:
432 logger.error(
433 "Descriptor value(s) does not contain valid comparison values (or are not initialized).");
434
435 return null;
436 }
437
438 // calculate distance values for all target patterns
439 for (int ii = 0; ii < size; ii++)
440 {
441 switch (comparisonType)
442 {
443 case BINARY_COMPARISON:
444 distances[ii] = distance(bitset, ii);
445
446 break;
447
448 case EUKLIDIAN_COMPARISON:
449 distances[ii] = distance(tmpValues, ii);
450
451 break;
452
453 case DIST_METRIC_COMPARISON:
454 distances[ii] = distance(dMetricValue, ii);
455
456 break;
457 }
458
459 // add distance result to molecule
460 // store as String value !!!!
461 // if you like another representation you must define
462 // a descriptor result for this case !!!
463 if (_distResultName != null)
464 {
465 JOEPairData dp = new JOEPairData();
466 dp.setAttribute(_distResultName + "_" + ii);
467 dp.setValue(Double.toString(distances[ii]));
468 mol.addData(dp);
469 }
470 }
471
472 return distances;
473 }
474
475 public double compare(JOEMol source, JOEMol target)
476 {
477 // get the actual descriptor values
478 JOEBitVec bitsetSource = null;
479 double[] descSource = new double[tmpValues.length];
480 DistanceMetricValue dMetricValueSource = null;
481 JOEBitVec bitsetTarget = null;
482 double[] descTarget = new double[tmpValues.length];
483 DistanceMetricValue dMetricValueTarget = null;
484
485 switch (comparisonType)
486 {
487 case BINARY_COMPARISON:
488 bitsetSource = getBitset(source);
489
490 if (bitsetSource == null)
491 {
492 return Double.NaN;
493 }
494
495 bitsetTarget = getBitset(target);
496
497 if (bitsetTarget == null)
498 {
499 return Double.NaN;
500 }
501
502 break;
503
504 case EUKLIDIAN_COMPARISON:
505 descSource = getDescriptors(source, descSource);
506
507 if (tmpValues == null)
508 {
509 return Double.NaN;
510 }
511
512 descTarget = getDescriptors(target, descTarget);
513
514 if (tmpValues == null)
515 {
516 return Double.NaN;
517 }
518
519 break;
520
521 case DIST_METRIC_COMPARISON:
522 dMetricValueSource = getDistMetricValue(source);
523
524 if (dMetricValueSource == null)
525 {
526 return Double.NaN;
527 }
528
529 dMetricValueTarget = getDistMetricValue(target);
530
531 if (dMetricValueTarget == null)
532 {
533 return Double.NaN;
534 }
535
536 break;
537
538 default:
539 logger.error(
540 "Descriptor value(s) does not contain valid comparison values (or are not initialized).");
541
542 return Double.NaN;
543 }
544
545 // calculate distance values for all target patterns
546 switch (comparisonType)
547 {
548 case BINARY_COMPARISON:
549 return distance(bitsetSource, bitsetSource);
550
551 case EUKLIDIAN_COMPARISON:
552 return distance(descSource, descSource);
553
554 case DIST_METRIC_COMPARISON:
555 return distance(dMetricValueSource, dMetricValueTarget);
556 }
557
558 return Double.NaN;
559 }
560
561 /*-------------------------------------------------------------------------*
562 * private methods
563 *-------------------------------------------------------------------------*/
564 private final JOEBitVec getBitset(final JOEMol mol)
565 {
566 DescResult result = null;
567
568 try
569 {
570 result = DescriptorHelper.descFromMol(mol, descriptors[0], true);
571 }
572 catch (DescriptorException ex)
573 {
574 ex.printStackTrace();
575
576 return null;
577 }
578
579 if (result == null)
580 {
581 logger.error("Descriptor '" + descriptors[0] +
582 "' not found in comparison molecule " + mol.getTitle() + ".");
583
584 return null;
585 }
586
587 JOEBitVec bitset = null;
588
589 if (JOEHelper.hasInterface(result, "BinaryValue"))
590 {
591 bitset = ((BinaryValue) result).getBinaryValue();
592 }
593 else
594 {
595 logger.error("Descriptor '" + descriptors[0] +
596 "' must be a 'bit set' value.");
597
598 return null;
599 }
600
601 return bitset;
602 }
603
604 private final double[] getDescriptors(final JOEMol mol, double[] vals)
605 {
606 DescResult result = null;
607 int size = vals.length;
608
609 for (int i = 0; i < size; i++)
610 {
611 try
612 {
613 result = DescriptorHelper.descFromMol(mol, descriptors[i], true);
614 }
615 catch (DescriptorException ex)
616 {
617 logger.error(ex.toString());
618 vals[i] = Double.NaN;
619 }
620
621 if (result == null)
622 {
623 logger.error("Descriptor '" + descriptors[i] +
624 "' not found in comparison molecule " + mol.getTitle() +
625 ".");
626 vals[i] = Double.NaN;
627 }
628
629 vals[i] = ((NativeValue) result).getDoubleNV();
630
631 if (JOEHelper.hasInterface(result, "NativeValue"))
632 {
633 }
634 else
635 {
636 logger.error("Descriptor '" + descriptors[i] +
637 "' must be a native value.");
638 vals[i] = Double.NaN;
639 }
640 }
641
642 return vals;
643 }
644
645 private final DistanceMetricValue getDistMetricValue(final JOEMol mol)
646 {
647 DescResult result = null;
648
649 try
650 {
651 result = DescriptorHelper.descFromMol(mol, descriptors[0], true);
652 }
653 catch (DescriptorException ex)
654 {
655 ex.printStackTrace();
656
657 return null;
658 }
659
660 if (result == null)
661 {
662 logger.error("Descriptor '" + descriptors[0] +
663 "' not found in comparison molecule " + mol.getTitle() + ".");
664
665 return null;
666 }
667
668 DistanceMetricValue dMetricValue = null;
669
670 if (JOEHelper.hasInterface(result, "DistanceMetricValue"))
671 {
672 dMetricValue = ((DistanceMetricValue) result);
673 }
674 else
675 {
676 logger.error("Descriptor '" + descriptors[0] +
677 "' must be a 'distance metric value' value.");
678
679 return null;
680 }
681
682 return dMetricValue;
683 }
684
685 /**
686 * Gets the doubleDesc attribute of the ComparisonHelper object
687 *
688 * @param mol Description of the Parameter
689 * @param _descName Description of the Parameter
690 * @return The doubleDesc value
691 * @exception DescriptorException Description of the Exception
692 */
693 private final double getDoubleDesc(final JOEMol mol, String _descName)
694 throws DescriptorException
695 {
696 double value = Double.NaN;
697 DescResult result = null;
698
699 // try
700 // {
701 result = DescriptorHelper.descFromMol(mol, _descName, true);
702
703 // }
704 // catch (DescriptorException ex)
705 // {
706 // throw ex;
707 // }
708 if (JOEHelper.hasInterface(result, "NativeValue"))
709 {
710 value = ((NativeValue) result).getDoubleNV();
711 }
712
713 return value;
714 }
715
716 /**
717 * Description of the Method
718 *
719 * @param mol Description of the Parameter
720 * @return Description of the Return Value
721 */
722 private final double distance(final JOEBitVec bitset, int index)
723 {
724 if (bitset == null)
725 {
726 return Double.NaN;
727 }
728
729 JOEBitVec target = (JOEBitVec) descBinaryValue.get(index);
730
731 return bitset.tanimoto(target);
732 }
733
734 private final double distance(final JOEBitVec source, final JOEBitVec target)
735 {
736 if ((source == null) || (target == null))
737 {
738 return Double.NaN;
739 }
740
741 return source.tanimoto(target);
742 }
743
744 /**
745 * Description of the Method
746 *
747 * @param mol Description of the Parameter
748 * @return Description of the Return Value
749 */
750 private final double distance(final DistanceMetricValue dMetricValue,
751 int index)
752 {
753 if (dMetricValue == null)
754 {
755 return Double.NaN;
756 }
757
758 DistanceMetricValue targetValue = (DistanceMetricValue) distMetricValue.get(index);
759
760 return dMetricValue.getDistance(targetValue);
761 }
762
763 private final double distance(final DistanceMetricValue source,
764 final DistanceMetricValue target)
765 {
766 if ((source == null) || (target == null))
767 {
768 return Double.NaN;
769 }
770
771 return source.getDistance(target);
772 }
773
774 /**
775 * Description of the Method
776 *
777 * @param mol Description of the Parameter
778 * @return Description of the Return Value
779 */
780 private final double distance(final double[] ds, int index)
781 {
782 double[] tmpVal = (double[]) descValues.get(index);
783
784 return distance(ds, tmpVal);
785 }
786
787 private final double distance(final double[] source, final double[] target)
788 {
789 if ((source == null) || (target == null))
790 {
791 return Double.NaN;
792 }
793
794 int s = source.length;
795 double val = 0.0;
796 double sum = 0.0;
797
798 for (int i = 0; i < s; i++)
799 {
800 val = target[i] - source[i];
801 sum += (val * val);
802 }
803
804 return Math.sqrt(sum);
805 }
806 }
807 ///////////////////////////////////////////////////////////////////////////////
808 // END OF FILE.
809 ///////////////////////////////////////////////////////////////////////////////