Source code: joelib/process/types/DescStatistic.java
1 ///////////////////////////////////////////////////////////////////////////////
2 // Filename: $RCSfile: DescStatistic.java,v $
3 // Purpose: Counts the number of descriptors and molecules in a molecule file.
4 // Language: Java
5 // Compiler: JDK 1.4
6 // Authors: Joerg K. Wegner
7 // Version: $Revision: 1.9 $
8 // $Date: 2003/08/22 15:56:20 $
9 // $Author: wegner $
10 //
11 // Copyright (c) Dept. Computer Architecture, University of Tuebingen, Germany
12 //
13 // This program is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation version 2 of the License.
16 //
17 // This program is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 // GNU General Public License for more details.
21 ///////////////////////////////////////////////////////////////////////////////
22 package joelib.process.types;
23
24 import joelib.data.JOEDataType;
25 import joelib.data.JOEGenericData;
26 import joelib.data.JOEPairData;
27
28 import joelib.desc.NativeValue;
29
30 import joelib.io.IOType;
31 import joelib.io.SimpleReader;
32
33 import joelib.molecule.JOEMol;
34 import joelib.molecule.JOEMolVector;
35
36 import joelib.process.JOEProcessException;
37 import joelib.process.SimpleProcess;
38
39 import joelib.util.JOEHelper;
40 import joelib.util.JOEProperty;
41
42 import joelib.util.iterator.GenericDataIterator;
43
44 import wsi.ra.tool.ArrayStatistic;
45 import wsi.ra.tool.PropertyHolder;
46 import wsi.ra.tool.ResourceLoader;
47
48 /*==========================================================================*
49 * IMPORTS
50 *==========================================================================*/
51 import java.io.FileInputStream;
52 import java.io.FileOutputStream;
53 import java.io.IOException;
54 import java.io.InputStreamReader;
55 import java.io.LineNumberReader;
56 import java.io.PrintStream;
57
58 import java.net.URL;
59
60 import java.util.Enumeration;
61 import java.util.Hashtable;
62 import java.util.Map;
63 import java.util.StringTokenizer;
64 import java.util.Vector;
65
66 import org.apache.log4j.Category;
67
68
69 /*==========================================================================*
70 * CLASS DECLARATION
71 *==========================================================================*/
72
73 /**
74 * Calling processor classes if the filter rule fits.
75 *
76 * @author wegnerj
77 * @license GPL
78 * @cvsversion $Revision: 1.9 $, $Date: 2003/08/22 15:56:20 $
79 */
80 public class DescStatistic extends SimpleProcess implements java.io.Serializable
81 {
82 //~ Static fields/initializers /////////////////////////////////////////////
83
84 /*-------------------------------------------------------------------------*
85 * public static member variables
86 *-------------------------------------------------------------------------*/
87
88 /**
89 * Obtain a suitable logger.
90 */
91 private static Category logger = Category.getInstance(
92 "joelib.process.types.DescStatistic");
93
94 // private final static JOEProperty[] ACCEPTED_PROPERTIES = new JOEProperty[]{
95 // new JOEProperty("SKIP_WRITER", "joelib.io.MoleculeFileType", "Writer for skipped molecule entries.", true),
96 // new JOEProperty("DELIMITER", "java.lang.String", "Delimiter between descriptors in flat mode.", true),
97 // new JOEProperty("COMMENT", "java.lang.String", "Comment character of the first line in flat mode.", true)
98 // };
99 private final static String FILE_EXT = ".statistic";
100
101 //~ Instance fields ////////////////////////////////////////////////////////
102
103 private Hashtable notNative = new Hashtable(50);
104 private Hashtable statistic = new Hashtable(50);
105 private Vector desc2ignore;
106
107 //~ Constructors ///////////////////////////////////////////////////////////
108
109 /*-------------------------------------------------------------------------*
110 * constructor
111 *-------------------------------------------------------------------------*/
112
113 /**
114 * Constructor for the DescSelectionWriter object
115 */
116 public DescStatistic()
117 {
118 clear();
119
120 // load descriptors which should be ignored
121 String value;
122
123 if ((value = PropertyHolder.instance().getProperty(this,
124 "descriptors2ignore")) == null)
125 {
126 }
127 else
128 {
129 Vector tmpVec = ResourceLoader.readLines(value);
130
131 if (tmpVec == null)
132 {
133 logger.error("File with descriptor names to ignore not found.");
134 }
135
136 desc2ignore = tmpVec;
137 }
138 }
139
140 //~ Methods ////////////////////////////////////////////////////////////////
141
142 // public static DescStatistic getDescStatistic(IOType inType, String inFile)
143 // {
144 // FileInputStream fis=null;
145 // try
146 // {
147 // fis=new FileInputStream(inFile);
148 // }
149 // catch (Exception ex)
150 // {
151 // ex.printStackTrace();
152 // return null;
153 // }
154 //
155 // return getDescStatistic(inType, fis);
156 // }
157 public static DescStatistic getDescStatistic(JOEMolVector molecules)
158 {
159 DescStatistic statistic = new DescStatistic();
160 int size = molecules.getSize();
161
162 JOEMol mol;
163
164 for (int i = 0; i < size; i++)
165 {
166 mol = molecules.getMol(i);
167
168 try
169 {
170 statistic.process(mol, null);
171 }
172 catch (JOEProcessException ex)
173 {
174 logger.error(ex.toString());
175 statistic = null;
176
177 return null;
178 }
179 }
180
181 return statistic;
182 }
183
184 /**
185 * Gets the descStatistic attribute of the DescStatistic class
186 *
187 * @param inType Description of the Parameter
188 * @param inFile Description of the Parameter
189 * @return The descStatistic value
190 */
191 public static DescStatistic getDescStatistic(IOType inType, String inFile)
192 {
193 DescStatistic statistic = new DescStatistic();
194
195 // load descriptor statistic if file exists
196 if (existsStatisticFileFor(inFile))
197 {
198 statistic.fromFileFor(inFile);
199
200 return statistic;
201 }
202
203 // create new descriptor statistic
204 SimpleReader reader = null;
205
206 try
207 {
208 reader = new SimpleReader(new FileInputStream(inFile), inType);
209 }
210 catch (Exception ex)
211 {
212 logger.error(ex.getMessage());
213
214 return null;
215 }
216
217 logger.info("Calculate descriptor statistic.");
218
219 JOEMol mol = new JOEMol(inType, inType);
220
221 for (;;)
222 {
223 try
224 {
225 if (!reader.readNext(mol))
226 {
227 break;
228 }
229 }
230 catch (Exception ex)
231 {
232 logger.error(ex.getMessage());
233 statistic = null;
234
235 return null;
236 }
237
238 try
239 {
240 statistic.process(mol, null);
241 }
242 catch (JOEProcessException ex)
243 {
244 logger.error(ex.getMessage());
245 statistic = null;
246
247 return null;
248 }
249 }
250
251 //reader.close();
252 // store descriptor statistic in file
253 statistic.writeStatisticFileFor(inFile);
254
255 return statistic;
256 }
257
258 /**
259 * Gets the descriptorStatistic attribute of the DescStatistic object
260 *
261 * @param descriptor Description of the Parameter
262 * @return The descriptorStatistic value
263 */
264 public ArrayStatistic getDescriptorStatistic(String descriptor)
265 {
266 // if(statistic==null)return null;
267 ArrayStatistic arrayStat = (ArrayStatistic) statistic.get(descriptor);
268
269 if (arrayStat == null)
270 {
271 logger.error("There exist no descriptor statistic for '" +
272 descriptor + "'");
273
274 return null;
275 }
276
277 arrayStat.calculateDerived();
278
279 return arrayStat;
280 }
281
282 /**
283 * Gets the descriptors attribute of the DescStatistic object
284 *
285 * @return The descriptors value
286 */
287 public Enumeration getDescriptors()
288 {
289 // if(statistic==null)return null;
290 return statistic.keys();
291 }
292
293 /**
294 * Gets the descriptorStatistic attribute of the DescStatistic object
295 *
296 * @param descriptor Description of the Parameter
297 * @return The descriptorStatistic value
298 */
299 public boolean isNative(String descriptor)
300 {
301 return !notNative.containsKey(descriptor);
302 }
303
304 /**
305 * Description of the Method
306 *
307 * @return Description of the Return Value
308 */
309 public boolean clear()
310 {
311 // if(statistic==null)return false;
312 statistic.clear();
313
314 return true;
315 }
316
317 /**
318 * Description of the Method
319 *
320 * @param fileName Description of the Parameter
321 * @return Description of the Return Value
322 */
323 public static boolean existsStatisticFileFor(String fileName)
324 {
325 FileInputStream fis = null;
326
327 // try to open file
328 try
329 {
330 fis = new FileInputStream(fileName + FILE_EXT);
331 }
332 catch (Exception ex)
333 {
334 return false;
335 }
336
337 return true;
338 }
339
340 /**
341 * Description of the Method
342 *
343 * @param fileName Description of the Parameter
344 * @return Description of the Return Value
345 */
346 public boolean fromFile(String fileName)
347 {
348 LineNumberReader lnr = null;
349 String line;
350 boolean ok = true;
351 URL location = this.getClass().getClassLoader().getSystemResource(fileName);
352 String fName;
353
354 if (location != null)
355 {
356 fName = location.getFile();
357 }
358 else
359 {
360 fName = fileName;
361 }
362
363 // try to open file
364 try
365 {
366 lnr = new LineNumberReader(new InputStreamReader(
367 new FileInputStream(fName)));
368
369 if ((line = lnr.readLine()) == null)
370 {
371 return (false);
372 }
373
374 StringTokenizer st;
375 int i;
376 ArrayStatistic arrayStat = null;
377 String descriptor = null;
378 String noNativeName = null;
379 int tokens;
380
381 // define array statistic data types
382 int count = 0;
383 double sum = Double.NaN;
384 double sumSq = Double.NaN;
385 double stdDev = Double.NaN;
386 double mean = Double.NaN;
387 double min = Double.NaN;
388 double max = Double.NaN;
389
390 // read statistic data
391 String token;
392
393 while ((line = lnr.readLine()) != null)
394 {
395 if (line.length() == 0 /*|| line.charAt(0)=='#' */ )
396 {
397 continue;
398 }
399
400 st = new StringTokenizer(line, " \r\n\t");
401 tokens = st.countTokens();
402
403 // System.out.println("line ("+tokens+"): "+line);
404 i = 0;
405
406 if (tokens == 8)
407 {
408 while (st.hasMoreTokens())
409 {
410 i++;
411 token = st.nextToken();
412
413 try
414 {
415 switch (i)
416 {
417 case 1:
418 descriptor = token;
419
420 break;
421
422 case 2:
423 count = (int) Double.parseDouble(token);
424
425 break;
426
427 case 3:
428 min = Double.parseDouble(token);
429
430 break;
431
432 case 4:
433 max = Double.parseDouble(token);
434
435 break;
436
437 case 5:
438 sum = Double.parseDouble(token);
439
440 break;
441
442 case 6:
443 sumSq = Double.parseDouble(token);
444
445 break;
446
447 case 7:
448 mean = Double.parseDouble(token);
449
450 break;
451
452 case 8:
453 stdDev = Double.parseDouble(token);
454
455 break;
456 }
457 }
458 catch (NumberFormatException ex)
459 {
460 ok = false;
461 logger.error(ex.toString());
462 }
463 }
464
465 arrayStat = new ArrayStatistic(count, min, max, sum, sumSq,
466 mean, stdDev);
467
468 statistic.put(descriptor, arrayStat);
469
470 // System.out.println(""+descriptor+" "+arrayStat.toString());
471 }
472 else if (tokens == 3)
473 {
474 while (st.hasMoreTokens())
475 {
476 i++;
477 token = st.nextToken();
478
479 switch (i)
480 {
481 case 1:
482 descriptor = token;
483
484 break;
485
486 case 2:
487 count = (int) Double.parseDouble(token);
488
489 break;
490
491 case 3:
492 noNativeName = token;
493
494 break;
495 }
496 }
497
498 arrayStat = new ArrayStatistic();
499 arrayStat.count = count;
500 statistic.put(descriptor, arrayStat);
501 notNative.put(descriptor, noNativeName);
502 }
503 else
504 {
505 logger.error("Wrong format in line " + lnr.getLineNumber());
506 ok = false;
507 }
508 }
509 }
510 catch (IOException ex)
511 {
512 logger.error(ex.toString());
513 ok = false;
514 }
515
516 return ok;
517 }
518
519 public boolean fromFileFor(String fileName)
520 {
521 String fn = fileName + FILE_EXT;
522 logger.info("Load descriptor statistic from " + fn);
523
524 return fromFile(fn);
525 }
526
527 /**
528 * Description of the Method
529 *
530 * @param descriptor Description of the Parameter
531 * @return Description of the Return Value
532 */
533 public boolean hasDescriptorStatistic(String descriptor)
534 {
535 // if(statistic==null)return false;
536 return statistic.containsKey(descriptor);
537 }
538
539 /*-------------------------------------------------------------------------*
540 * public methods
541 *-------------------------------------------------------------------------*/
542
543 /**
544 * Description of the Method
545 *
546 * @return Description of the Return Value
547 */
548 public JOEProperty[] neededProperties()
549 {
550 // return ACCEPTED_PROPERTIES;
551 return null;
552 }
553
554 /**
555 * Description of the Method
556 *
557 * @param mol Description of the Parameter
558 * @param properties Description of the Parameter
559 * @return Description of the Return Value
560 * @exception JOEProcessException Description of the Exception
561 */
562 public boolean process(JOEMol mol, Map properties)
563 throws JOEProcessException
564 {
565 try
566 {
567 super.process(mol, properties);
568 }
569 catch (JOEProcessException e)
570 {
571 throw new JOEProcessException("Properties for " +
572 this.getClass().getName() + " not correct.");
573 }
574
575 // System.out.println("processing:::"+mol.getTitle());
576 JOEGenericData genericData;
577 GenericDataIterator gdit = mol.genericDataIterator();
578 ArrayStatistic arrayStat;
579 String descriptor;
580
581 // String ignoreDesc = PropertyHolder.instance().getProperties().getProperty("jcompchem.joelib.process.DescStatistic.ignoreDescriptor", "Entry_Number"); while (gdit.hasNext())
582 boolean ignoreDesc = false;
583
584 while (gdit.hasNext())
585 {
586 genericData = gdit.nextGenericData();
587 descriptor = genericData.getAttribute();
588
589 // ignore descriptors in list
590 if (desc2ignore != null)
591 {
592 ignoreDesc = false;
593
594 for (int i = 0; i < desc2ignore.size(); i++)
595 {
596 if (descriptor.equals((String) desc2ignore.get(i)))
597 {
598 // System.out.println("ignore " + desc2ignore.get(i));
599 ignoreDesc = true;
600
601 break;
602 }
603 }
604
605 if (ignoreDesc)
606 {
607 continue;
608 }
609 }
610
611 // parse data, if possible
612 genericData = mol.getData(descriptor, true);
613
614 // check descriptor statistic entry
615 if (statistic.containsKey(descriptor))
616 {
617 arrayStat = (ArrayStatistic) statistic.get(descriptor);
618 }
619 else
620 {
621 arrayStat = new ArrayStatistic();
622 statistic.put(descriptor, arrayStat);
623 }
624
625 if (genericData.getDataType() == JOEDataType.JOE_PAIR_DATA)
626 {
627 JOEPairData data = (JOEPairData) genericData;
628
629 if (JOEHelper.hasInterface(data, "NativeValue"))
630 {
631 arrayStat.add(((NativeValue) data).getDoubleNV());
632 }
633 else
634 {
635 arrayStat.count += 1;
636
637 String notNativeName = data.getValue().getClass().getName();
638
639 if (!notNative.containsKey(descriptor))
640 {
641 notNative.put(descriptor, notNativeName);
642 }
643 }
644 }
645 }
646
647 return true;
648 }
649
650 /**
651 * Description of the Method
652 *
653 * @param _desc Description of the Parameter
654 * @param as Description of the Parameter
655 * @return Description of the Return Value
656 */
657 public Object putArrayStatistic(String _desc, ArrayStatistic as)
658 {
659 return statistic.put(_desc, as);
660 }
661
662 /**
663 * Description of the Method
664 *
665 * @param descriptor Description of the Parameter
666 * @return Description of the Return Value
667 */
668 public String showDescriptorStatistic(String descriptor)
669 {
670 // if(statistic==null)return null;
671 ArrayStatistic arrayStat = (ArrayStatistic) statistic.get(descriptor);
672
673 if (arrayStat == null)
674 {
675 logger.error("There exist no descriptor statistic for '" +
676 descriptor + "'");
677
678 return null;
679 }
680
681 arrayStat.calculateDerived();
682
683 StringBuffer sb = new StringBuffer(100);
684 sb.append(descriptor);
685 sb.append('\n');
686 sb.append(arrayStat.toString());
687 sb.append('\n');
688
689 return sb.toString();
690 }
691
692 /**
693 * Description of the Method
694 *
695 * @return Description of the Return Value
696 */
697 public String toString()
698 {
699 // if(statistic==null)return null;
700 StringBuffer sb = new StringBuffer(10000);
701
702 sb.append("#Descriptor Count Min Max Sum SumSq Mean StdDev\n");
703
704 ArrayStatistic arrayStat;
705 String descriptor;
706 String noNativeName;
707
708 for (Enumeration e = getDescriptors(); e.hasMoreElements();)
709 {
710 descriptor = (String) e.nextElement();
711
712 // sb.append(showDescriptorStatistic((String)e.nextElement()));
713 if (notNative.containsKey(descriptor))
714 {
715 arrayStat = getDescriptorStatistic(descriptor);
716 noNativeName = (String) notNative.get(descriptor);
717 sb.append(descriptor);
718 sb.append(' ');
719 sb.append((int) arrayStat.count);
720 sb.append(' ');
721 sb.append(noNativeName);
722 }
723 else
724 {
725 arrayStat = getDescriptorStatistic(descriptor);
726 arrayStat.calculateDerived();
727 sb.append(descriptor);
728 sb.append(' ');
729 sb.append((int) arrayStat.count);
730 sb.append(' ');
731 sb.append(arrayStat.min);
732 sb.append(' ');
733 sb.append(arrayStat.max);
734 sb.append(' ');
735 sb.append(arrayStat.sum);
736 sb.append(' ');
737 sb.append(arrayStat.sumSq);
738 sb.append(' ');
739 sb.append(arrayStat.mean);
740 sb.append(' ');
741 sb.append(arrayStat.stdDev);
742 }
743
744 sb.append('\n');
745 }
746
747 return sb.toString();
748 }
749
750 public void writeStatisticFileFor(String _inFile)
751 {
752 String filename = _inFile + FILE_EXT;
753 PrintStream ps = null;
754
755 try
756 {
757 ps = new PrintStream(new FileOutputStream(filename));
758 ps.println(this.toString());
759 logger.info("Statistic for " + _inFile);
760 logger.info(" written to " + filename);
761 }
762 catch (Exception ex)
763 {
764 logger.warn(ex.toString());
765 logger.warn("Statistic not written for " + _inFile);
766 }
767 }
768
769 /*-------------------------------------------------------------------------*
770 * protected methods
771 *-------------------------------------------------------------------------*/
772 }
773 ///////////////////////////////////////////////////////////////////////////////
774 // END OF FILE.
775 ///////////////////////////////////////////////////////////////////////////////