Source code: joelib/process/types/DescBinning.java
1 ///////////////////////////////////////////////////////////////////////////////
2 // Filename: $RCSfile: DescBinning.java,v $
3 // Purpose: Counts the number of descriptors and molecules in a molecule file.
4 // Language: Java
5 // Compiler: JDK 1.4
6 // Authors: Joerg K. Wegner
7 // Version: $Revision: 1.15 $
8 // $Date: 2003/08/22 15:56:20 $
9 // $Author: wegner $
10 //
11 // Copyright (c) Dept. Computer Architecture, University of Tuebingen, Germany
12 //
13 // This program is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation version 2 of the License.
16 //
17 // This program is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 // GNU General Public License for more details.
21 ///////////////////////////////////////////////////////////////////////////////
22 package joelib.process.types;
23
24 import joelib.data.JOEDataType;
25 import joelib.data.JOEGenericData;
26 import joelib.data.JOEPairData;
27
28 import joelib.desc.NativeValue;
29
30 import joelib.io.IOType;
31 import joelib.io.SimpleReader;
32
33 import joelib.molecule.JOEMol;
34 import joelib.molecule.JOEMolVector;
35
36 import joelib.process.JOEProcessException;
37 import joelib.process.SimpleProcess;
38
39 import joelib.util.JOEHelper;
40 import joelib.util.JOEProperty;
41
42 import joelib.util.iterator.GenericDataIterator;
43
44 import wsi.ra.tool.ArrayBinning;
45 import wsi.ra.tool.ArrayStatistic;
46 import wsi.ra.tool.PropertyHolder;
47 import wsi.ra.tool.ResourceLoader;
48
49 /*==========================================================================*
50 * IMPORTS
51 *========================================================================== */
52 import java.io.FileInputStream;
53 import java.io.FileOutputStream;
54 import java.io.IOException;
55 import java.io.InputStreamReader;
56 import java.io.LineNumberReader;
57 import java.io.PrintStream;
58
59 import java.net.URL;
60
61 import java.util.Enumeration;
62 import java.util.Hashtable;
63 import java.util.Map;
64 import java.util.StringTokenizer;
65 import java.util.Vector;
66
67 import org.apache.log4j.Category;
68
69
70 /*==========================================================================*
71 * CLASS DECLARATION
72 *========================================================================== */
73
74 /**
75 * Calling processor classes if the filter rule fits.
76 *
77 * @author wegnerj
78 * @license GPL
79 * @cvsversion $Revision: 1.15 $, $Date: 2003/08/22 15:56:20 $
80 */
81 public class DescBinning extends SimpleProcess implements java.io.Serializable
82 {
83 //~ Static fields/initializers /////////////////////////////////////////////
84
85 /*-------------------------------------------------------------------------*
86 * public static member variables
87 *------------------------------------------------------------------------- */
88
89 /**
90 * Obtain a suitable logger.
91 */
92 private static Category logger = Category.getInstance(
93 "joelib.process.types.DescBinning");
94
95 // private final static JOEProperty[] ACCEPTED_PROPERTIES = new JOEProperty[]{
96 // new JOEProperty("NUMBER_OF_BINS", "java.lang.Integer", "Number of bins to create.", true),
97 // };
98 private final static String FILE_EXT = ".binning";
99
100 //~ Instance fields ////////////////////////////////////////////////////////
101
102 private DescStatistic statistic;
103 private Hashtable bins = new Hashtable();
104 private Vector desc2ignore;
105 private int numberOfBins = -1;
106
107 //~ Constructors ///////////////////////////////////////////////////////////
108
109 /*-------------------------------------------------------------------------*
110 * constructor
111 *------------------------------------------------------------------------- */
112
113 /**
114 * Constructor for the DescSelectionWriter object
115 */
116 public DescBinning()
117 {
118 statistic = new DescStatistic();
119 clear();
120
121 // load descriptors which should be ignored
122 String value;
123
124 if ((value = PropertyHolder.instance().getProperty(this,
125 "descriptors2ignore")) == null)
126 {
127 }
128 else
129 {
130 Vector tmpVec = ResourceLoader.readLines(value);
131
132 if (tmpVec == null)
133 {
134 logger.error("File with descriptor names to ignore not found.");
135 }
136
137 desc2ignore = tmpVec;
138 }
139
140 numberOfBins = PropertyHolder.instance().getInt(this, "numberOfBins",
141 1, Integer.MAX_VALUE, 20);
142 }
143
144 //~ Methods ////////////////////////////////////////////////////////////////
145
146 // public static DescBinning getDescBinning(IOType inType, String inFile, int _numberOfBins)
147 // {
148 // FileInputStream fis=null;
149 // try
150 // {
151 // fis=new FileInputStream(inFile);
152 // }
153 // catch (Exception ex)
154 // {
155 // ex.printStackTrace();
156 // return null;
157 // }
158 //
159 // return getDescBinning(inType, fis, _numberOfBins);
160 // }
161 // public static DescBinning getDescBinning(IOType inType, InputStream inStream, int _numberOfBins)
162
163 /**
164 * Gets the descBinning attribute of the DescBinning class
165 *
166 * @param inType Description of the Parameter
167 * @param inFile Description of the Parameter
168 * @param _numberOfBins Description of the Parameter
169 * @return The descBinning value
170 */
171 public static DescBinning getDescBinning(IOType inType, String inFile)
172 {
173 return getDescBinning(inType, inFile, -1);
174 }
175
176 /**
177 * Gets the descBinning attribute of the DescBinning class
178 *
179 * @param inType Description of the Parameter
180 * @param inFile Description of the Parameter
181 * @param _numberOfBins Description of the Parameter
182 * @return The descBinning value
183 */
184 public static DescBinning getDescBinning(JOEMolVector molecules)
185 {
186 DescBinning binning = new DescBinning();
187
188 binning.statistic = DescStatistic.getDescStatistic(molecules);
189
190 int size = molecules.getSize();
191 JOEMol mol;
192
193 for (int i = 0; i < size; i++)
194 {
195 mol = molecules.getMol(i);
196
197 try
198 {
199 binning.process(mol, null);
200 }
201 catch (JOEProcessException ex)
202 {
203 ex.printStackTrace();
204
205 return null;
206 }
207 }
208
209 return binning;
210 }
211
212 public static DescBinning getDescBinning(IOType inType, String inFile,
213 int _numberOfBins)
214 {
215 DescBinning binning = new DescBinning();
216
217 // load descriptor binning if file exists
218 if (existsBinningFileFor(inFile))
219 {
220 binning.fromFileFor(inFile);
221
222 return binning;
223 }
224
225 // create new descriptor binning
226 // descriptors = _descriptors;
227 // InputStream clonedIS = null;
228 SimpleReader reader = null;
229
230 try
231 {
232 if (_numberOfBins < 1)
233 {
234 binning.init(inType, inFile);
235 }
236 else
237 {
238 binning.init(inType, inFile, _numberOfBins);
239 }
240
241 // clonedIS=(InputStream)inStream.clone();
242 reader = new SimpleReader(new FileInputStream(inFile), inType);
243 }
244 catch (Exception ex)
245 {
246 ex.printStackTrace();
247 logger.error(ex.getMessage());
248
249 return null;
250 }
251
252 logger.info("Calculate descriptor binning.");
253
254 JOEMol mol = new JOEMol(inType, inType);
255
256 for (;;)
257 {
258 try
259 {
260 if (!reader.readNext(mol))
261 {
262 break;
263 }
264 }
265 catch (Exception ex)
266 {
267 ex.printStackTrace();
268 logger.error(ex.getMessage());
269
270 return null;
271 }
272
273 try
274 {
275 binning.process(mol, null);
276 }
277 catch (JOEProcessException ex)
278 {
279 ex.printStackTrace();
280 logger.error(ex.getMessage());
281
282 return null;
283 }
284 }
285
286 reader.close();
287 reader = null;
288
289 // store descriptor binning in file
290 binning.writeBinningFileFor(inFile);
291
292 return binning;
293 }
294
295 /**
296 * Gets the descStatistic attribute of the DescBinning object
297 *
298 * @return The descStatistic value
299 */
300 public DescStatistic getDescStatistic()
301 {
302 return statistic;
303 }
304
305 /**
306 * Gets the descriptorBinning attribute of the DescBinning object
307 *
308 * @param descriptor Description of the Parameter
309 * @return The descriptorBinning value
310 */
311 public ArrayBinning getDescriptorBinning(String descriptor)
312 {
313 if (statistic == null)
314 {
315 return null;
316 }
317
318 ArrayBinning arrayBinning = (ArrayBinning) bins.get(descriptor);
319
320 if (arrayBinning == null)
321 {
322 logger.error("There exist no descriptor binning for '" +
323 descriptor + "'");
324
325 return null;
326 }
327
328 arrayBinning.calculateDerived();
329
330 return arrayBinning;
331 }
332
333 /**
334 * Gets the descriptors attribute of the DescBinning object
335 *
336 * @return The descriptors value
337 */
338 public Enumeration getDescriptors()
339 {
340 if (statistic == null)
341 {
342 return null;
343 }
344
345 return bins.keys();
346 }
347
348 /**
349 * Description of the Method
350 *
351 * @return Description of the Return Value
352 */
353 public boolean clear()
354 {
355 if (statistic == null)
356 {
357 return false;
358 }
359
360 /*if (!statistic.clear())
361 *{
362 *return false;
363 *} */
364
365 //bins.clear();
366 return true;
367 }
368
369 /**
370 * Description of the Method
371 *
372 * @param fileName Description of the Parameter
373 * @return Description of the Return Value
374 */
375 public static boolean existsBinningFileFor(String fileName)
376 {
377 FileInputStream fis = null;
378
379 // try to open file
380 try
381 {
382 fis = new FileInputStream(fileName + FILE_EXT);
383 }
384 catch (Exception ex)
385 {
386 return false;
387 }
388
389 return true;
390 }
391
392 /**
393 * Description of the Method
394 *
395 * @param fileName Description of the Parameter
396 * @return Description of the Return Value
397 */
398 public boolean fromFile(String fileName)
399 {
400 LineNumberReader lnr = null;
401 String line;
402 boolean ok = true;
403 int VARS = 10;
404 URL location = this.getClass().getClassLoader().getSystemResource(fileName);
405 String fName;
406
407 if (location != null)
408 {
409 fName = location.getFile();
410 }
411 else
412 {
413 fName = fileName;
414 }
415
416 // try to open file
417 try
418 {
419 lnr = new LineNumberReader(new InputStreamReader(
420 new FileInputStream(fName)));
421
422 StringTokenizer st;
423 int tokens = 0;
424 String token;
425 int i;
426
427 // get binning structure from first line
428 if ((line = lnr.readLine()) == null)
429 {
430 return (false);
431 }
432
433 // System.out.println("line:"+line);
434 st = new StringTokenizer(line, " \r\n\t");
435 tokens = st.countTokens();
436 numberOfBins = tokens - VARS;
437
438 if (numberOfBins <= 0)
439 {
440 logger.error("Negative number of bins.");
441
442 return false;
443 }
444
445 // read data
446 ArrayStatistic arrayStat = null;
447 ArrayBinning arrayBinning = null;
448 String descriptor = null;
449
450 // define array statistic data types
451 int count = 0;
452 double sum = Double.NaN;
453 double sumSq = Double.NaN;
454 double stdDev = Double.NaN;
455 double mean = Double.NaN;
456 double min = Double.NaN;
457 double max = Double.NaN;
458 double shannonEntropy = Double.NaN;
459 double entropy = Double.NaN;
460 boolean containsNaN = false;
461 int[] tmpA = new int[numberOfBins];
462
463 // read statistic data
464 while ((line = lnr.readLine()) != null)
465 {
466 if (line.length() == 0 /*|| line.charAt(0)=='#' */ )
467 {
468 continue;
469 }
470
471 st = new StringTokenizer(line, " \r\n\t");
472 tokens = st.countTokens();
473
474 // System.out.println("line ("+tokens+", b="+numberOfBins+"): "+line);
475 i = 0;
476
477 if (tokens == (VARS + numberOfBins))
478 {
479 while (st.hasMoreTokens())
480 {
481 i++;
482 token = st.nextToken();
483
484 try
485 {
486 switch (i)
487 {
488 case 1:
489 descriptor = token;
490
491 break;
492
493 case 2:
494 count = (int) Double.parseDouble(token);
495
496 break;
497
498 case 3:
499 shannonEntropy = Double.parseDouble(token);
500
501 break;
502
503 case 4:
504 entropy = (int) Double.parseDouble(token);
505
506 break;
507
508 case 5:
509 min = Double.parseDouble(token);
510
511 break;
512
513 case 6:
514 max = Double.parseDouble(token);
515
516 break;
517
518 case 7:
519 sum = Double.parseDouble(token);
520
521 break;
522
523 case 8:
524 sumSq = Double.parseDouble(token);
525
526 break;
527
528 case 9:
529 mean = Double.parseDouble(token);
530
531 break;
532
533 case 10:
534 stdDev = Double.parseDouble(token);
535
536 break;
537
538 case 11:
539 containsNaN = Boolean.valueOf(token)
540 .booleanValue();
541
542 break;
543
544 default:
545
546 // System.out.print(" "+(i-VARS-1)+"="+token);
547 tmpA[i - VARS - 1] = Integer.parseInt(token);
548
549 break;
550 }
551 }
552 catch (NumberFormatException ex)
553 {
554 ok = false;
555 ex.printStackTrace();
556 logger.error(ex.toString());
557 }
558 }
559
560 arrayStat = new ArrayStatistic(count, min, max, sum, sumSq,
561 mean, stdDev);
562 statistic.putArrayStatistic(descriptor, arrayStat);
563 arrayBinning = new ArrayBinning(numberOfBins, arrayStat);
564 arrayBinning.shannonEntropy = shannonEntropy;
565 arrayBinning.entropy = entropy;
566 arrayBinning.binning = new int[numberOfBins];
567 arrayBinning.containsNaN = containsNaN;
568 System.arraycopy(tmpA, 0, arrayBinning.binning, 0,
569 numberOfBins);
570 bins.put(descriptor, arrayBinning);
571
572 // System.out.print(""+descriptor+" "+arrayBinning.toString());
573 }
574 else
575 {
576 logger.error("Wrong format in line " + lnr.getLineNumber());
577 ok = false;
578 }
579 }
580 }
581 catch (IOException ex)
582 {
583 ex.printStackTrace();
584 logger.error(ex.toString());
585 ok = false;
586 }
587
588 return ok;
589 }
590
591 public boolean fromFileFor(String fileName)
592 {
593 String fn = fileName + FILE_EXT;
594 logger.info("Load descriptor binning from " + fn);
595
596 return fromFile(fn);
597 }
598
599 /*-------------------------------------------------------------------------*
600 * public methods
601 *------------------------------------------------------------------------- */
602
603 /**
604 * Description of the Method
605 *
606 * @param _statistic Description of the Parameter
607 * @param _numberOfBins Description of the Parameter
608 */
609 public void init(DescStatistic _statistic)
610 {
611 statistic = _statistic;
612 }
613
614 /**
615 * Description of the Method
616 *
617 * @param _statistic Description of the Parameter
618 * @param _numberOfBins Description of the Parameter
619 */
620 public void init(DescStatistic _statistic, int _numberOfBins)
621 {
622 statistic = _statistic;
623 numberOfBins = _numberOfBins;
624 }
625
626 /**
627 * Description of the Method
628 *
629 * @param inType Description of the Parameter
630 * @param _numberOfBins Description of the Parameter
631 * @param inFile Description of the Parameter
632 * @exception Exception Description of the Exception
633 */
634 public void init(IOType inType, String inFile) throws Exception
635 {
636 statistic = DescStatistic.getDescStatistic(inType, inFile);
637 }
638
639 /**
640 * Description of the Method
641 *
642 * @param inType Description of the Parameter
643 * @param _numberOfBins Description of the Parameter
644 * @param inFile Description of the Parameter
645 * @exception Exception Description of the Exception
646 */
647 public void init(IOType inType, String inFile, int _numberOfBins)
648 throws Exception
649 {
650 numberOfBins = _numberOfBins;
651 statistic = DescStatistic.getDescStatistic(inType, inFile);
652
653 // System.out.println("Descriptor statistic for binning:\n " + statistic.toString());
654 }
655
656 /**
657 * Description of the Method
658 *
659 * @return Description of the Return Value
660 */
661 public JOEProperty[] neededProperties()
662 {
663 // return ACCEPTED_PROPERTIES;
664 return null;
665 }
666
667 /**
668 * Description of the Method
669 *
670 * @return Description of the Return Value
671 */
672 public int numberOfDescriptors()
673 {
674 if (statistic == null)
675 {
676 return -1;
677 }
678
679 return bins.size();
680 }
681
682 /**
683 * Description of the Method
684 *
685 * @param mol Description of the Parameter
686 * @param properties Description of the Parameter
687 * @return Description of the Return Value
688 * @exception JOEProcessException Description of the Exception
689 */
690 public boolean process(JOEMol mol, Map properties)
691 throws JOEProcessException
692 {
693 if (statistic == null)
694 {
695 return false;
696 }
697
698 try
699 {
700 super.process(mol, properties);
701 }
702 catch (JOEProcessException e)
703 {
704 throw new JOEProcessException("Properties for " +
705 this.getClass().getName() + " not correct.");
706 }
707
708 JOEGenericData genericData;
709 GenericDataIterator gdit = mol.genericDataIterator();
710 ArrayStatistic arrayStat;
711 String descriptor;
712 double value = 0.0;
713 ArrayBinning arrayBinning;
714 boolean ignoreDesc = false;
715
716 while (gdit.hasNext())
717 {
718 genericData = gdit.nextGenericData();
719 descriptor = genericData.getAttribute();
720
721 // ignore descriptors in list
722 if (desc2ignore != null)
723 {
724 ignoreDesc = false;
725
726 for (int i = 0; i < desc2ignore.size(); i++)
727 {
728 if (descriptor.equals((String) desc2ignore.get(i)))
729 {
730 ignoreDesc = true;
731
732 break;
733 }
734 }
735
736 if (ignoreDesc)
737 {
738 continue;
739 }
740 }
741
742 if (genericData.getDataType() == JOEDataType.JOE_PAIR_DATA)
743 {
744 // parse data, if possible
745 genericData = mol.getData(descriptor, true);
746
747 // check descriptor binning entry
748 if (bins.containsKey(descriptor))
749 {
750 arrayBinning = (ArrayBinning) bins.get(descriptor);
751 }
752 else
753 {
754 // check descriptor statistic entry
755 arrayStat = statistic.getDescriptorStatistic(descriptor);
756
757 if (arrayStat == null)
758 {
759 logger.error("Statistic for " + descriptor +
760 " does not exist.");
761
762 return false;
763 }
764
765 arrayBinning = new ArrayBinning(numberOfBins, arrayStat);
766 bins.put(descriptor, arrayBinning);
767 }
768
769 JOEPairData data = (JOEPairData) genericData;
770
771 if (JOEHelper.hasInterface(data, "NativeValue"))
772 {
773 value = ((NativeValue) data).getDoubleNV();
774
775 if (arrayBinning.add(value) == -1)
776 {
777 logger.error("Out of range (" + value + ") in " +
778 descriptor);
779
780 return false;
781 }
782 }
783
784 // else
785 // {
786 // }
787 }
788 }
789
790 return true;
791 }
792
793 /**
794 * Description of the Method
795 *
796 * @return Description of the Return Value
797 */
798 public String toString()
799 {
800 if (statistic == null)
801 {
802 return null;
803 }
804
805 StringBuffer sb = new StringBuffer(10000);
806
807 //sb.append("Descriptor Binning\n");
808 sb.append(
809 "#Descriptor Count ShannonEntropy Entropy Min Max Sum SumSq Mean StdDev NaN");
810
811 for (int i = 1; i <= numberOfBins; i++)
812 {
813 sb.append(" bin");
814 sb.append(i);
815 }
816
817 sb.append("\n");
818
819 ArrayBinning arrayBinning;
820 String descriptor;
821
822 for (Enumeration e = getDescriptors(); e.hasMoreElements();)
823 {
824 descriptor = (String) e.nextElement();
825
826 // sb.append(showDescriptorStatistic((String)e.nextElement()));
827 arrayBinning = (ArrayBinning) bins.get(descriptor);
828 sb.append(descriptor);
829 sb.append(' ');
830 sb.append(arrayBinning.toString());
831 }
832
833 return sb.toString();
834 }
835
836 public void writeBinningFileFor(String _inFile)
837 {
838 String filename = _inFile + FILE_EXT;
839 PrintStream ps = null;
840
841 try
842 {
843 ps = new PrintStream(new FileOutputStream(filename));
844 ps.println(this.toString());
845 logger.info("Binning for " + _inFile);
846 logger.info(" written to " + filename);
847 }
848 catch (Exception ex)
849 {
850 logger.warn(ex.toString());
851 logger.warn("Binning not written for " + _inFile);
852 }
853 }
854
855 /*-------------------------------------------------------------------------*
856 * private methods
857 *------------------------------------------------------------------------- */
858 }
859 ///////////////////////////////////////////////////////////////////////////////
860 // END OF FILE.
861 ///////////////////////////////////////////////////////////////////////////////