Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: joelib/process/types/DescStatistic.java


1   ///////////////////////////////////////////////////////////////////////////////
2   //  Filename: $RCSfile: DescStatistic.java,v $
3   //  Purpose:  Counts the number of descriptors and molecules in a molecule file.
4   //  Language: Java
5   //  Compiler: JDK 1.4
6   //  Authors:  Joerg K. Wegner
7   //  Version:  $Revision: 1.9 $
8   //            $Date: 2003/08/22 15:56:20 $
9   //            $Author: wegner $
10  //
11  //  Copyright (c) Dept. Computer Architecture, University of Tuebingen, Germany
12  //
13  //  This program is free software; you can redistribute it and/or modify
14  //  it under the terms of the GNU General Public License as published by
15  //  the Free Software Foundation version 2 of the License.
16  //
17  //  This program is distributed in the hope that it will be useful,
18  //  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  //  GNU General Public License for more details.
21  ///////////////////////////////////////////////////////////////////////////////
22  package joelib.process.types;
23  
24  import joelib.data.JOEDataType;
25  import joelib.data.JOEGenericData;
26  import joelib.data.JOEPairData;
27  
28  import joelib.desc.NativeValue;
29  
30  import joelib.io.IOType;
31  import joelib.io.SimpleReader;
32  
33  import joelib.molecule.JOEMol;
34  import joelib.molecule.JOEMolVector;
35  
36  import joelib.process.JOEProcessException;
37  import joelib.process.SimpleProcess;
38  
39  import joelib.util.JOEHelper;
40  import joelib.util.JOEProperty;
41  
42  import joelib.util.iterator.GenericDataIterator;
43  
44  import wsi.ra.tool.ArrayStatistic;
45  import wsi.ra.tool.PropertyHolder;
46  import wsi.ra.tool.ResourceLoader;
47  
48  /*==========================================================================*
49   * IMPORTS
50   *==========================================================================*/
51  import java.io.FileInputStream;
52  import java.io.FileOutputStream;
53  import java.io.IOException;
54  import java.io.InputStreamReader;
55  import java.io.LineNumberReader;
56  import java.io.PrintStream;
57  
58  import java.net.URL;
59  
60  import java.util.Enumeration;
61  import java.util.Hashtable;
62  import java.util.Map;
63  import java.util.StringTokenizer;
64  import java.util.Vector;
65  
66  import org.apache.log4j.Category;
67  
68  
69  /*==========================================================================*
70   * CLASS DECLARATION
71   *==========================================================================*/
72  
73  /**
74   *  Calling processor classes if the filter rule fits.
75   *
76   * @author     wegnerj
77   * @license    GPL
78   * @cvsversion    $Revision: 1.9 $, $Date: 2003/08/22 15:56:20 $
79   */
80  public class DescStatistic extends SimpleProcess implements java.io.Serializable
81  {
82      //~ Static fields/initializers /////////////////////////////////////////////
83  
84      /*-------------------------------------------------------------------------*
85       *  public static member variables
86       *-------------------------------------------------------------------------*/
87  
88      /**
89       *  Obtain a suitable logger.
90       */
91      private static Category logger = Category.getInstance(
92              "joelib.process.types.DescStatistic");
93  
94      //  private final static  JOEProperty[]  ACCEPTED_PROPERTIES    = new JOEProperty[]{
95      //      new JOEProperty("SKIP_WRITER", "joelib.io.MoleculeFileType", "Writer for skipped molecule entries.", true),
96      //      new JOEProperty("DELIMITER", "java.lang.String", "Delimiter between descriptors in flat mode.", true),
97      //      new JOEProperty("COMMENT", "java.lang.String", "Comment character of the first line in flat mode.", true)
98      //      };
99      private final static String FILE_EXT = ".statistic";
100 
101     //~ Instance fields ////////////////////////////////////////////////////////
102 
103     private Hashtable notNative = new Hashtable(50);
104     private Hashtable statistic = new Hashtable(50);
105     private Vector desc2ignore;
106 
107     //~ Constructors ///////////////////////////////////////////////////////////
108 
109     /*-------------------------------------------------------------------------*
110      *  constructor
111      *-------------------------------------------------------------------------*/
112 
113     /**
114      *  Constructor for the DescSelectionWriter object
115      */
116     public DescStatistic()
117     {
118         clear();
119 
120         // load descriptors which should be ignored
121         String value;
122 
123         if ((value = PropertyHolder.instance().getProperty(this,
124                         "descriptors2ignore")) == null)
125         {
126         }
127         else
128         {
129             Vector tmpVec = ResourceLoader.readLines(value);
130 
131             if (tmpVec == null)
132             {
133                 logger.error("File with descriptor names to ignore not found.");
134             }
135 
136             desc2ignore = tmpVec;
137         }
138     }
139 
140     //~ Methods ////////////////////////////////////////////////////////////////
141 
142     //  public static DescStatistic getDescStatistic(IOType inType, String inFile)
143     //  {
144     //    FileInputStream fis=null;
145     //    try
146     //    {
147     //      fis=new FileInputStream(inFile);
148     //    }
149     //    catch (Exception ex)
150     //    {
151     //      ex.printStackTrace();
152     //      return null;
153     //    }
154     //
155     //    return getDescStatistic(inType, fis);
156     //  }
157     public static DescStatistic getDescStatistic(JOEMolVector molecules)
158     {
159         DescStatistic statistic = new DescStatistic();
160         int size = molecules.getSize();
161 
162         JOEMol mol;
163 
164         for (int i = 0; i < size; i++)
165         {
166             mol = molecules.getMol(i);
167 
168             try
169             {
170                 statistic.process(mol, null);
171             }
172              catch (JOEProcessException ex)
173             {
174                 logger.error(ex.toString());
175                 statistic = null;
176 
177                 return null;
178             }
179         }
180 
181         return statistic;
182     }
183 
184     /**
185      * Gets the descStatistic attribute of the DescStatistic class
186      *
187      * @param inType    Description of the Parameter
188      * @param inFile    Description of the Parameter
189      * @return          The descStatistic value
190      */
191     public static DescStatistic getDescStatistic(IOType inType, String inFile)
192     {
193         DescStatistic statistic = new DescStatistic();
194 
195         // load descriptor statistic if file exists
196         if (existsStatisticFileFor(inFile))
197         {
198             statistic.fromFileFor(inFile);
199 
200             return statistic;
201         }
202 
203         // create new descriptor statistic
204         SimpleReader reader = null;
205 
206         try
207         {
208             reader = new SimpleReader(new FileInputStream(inFile), inType);
209         }
210          catch (Exception ex)
211         {
212             logger.error(ex.getMessage());
213 
214             return null;
215         }
216 
217         logger.info("Calculate descriptor statistic.");
218 
219         JOEMol mol = new JOEMol(inType, inType);
220 
221         for (;;)
222         {
223             try
224             {
225                 if (!reader.readNext(mol))
226                 {
227                     break;
228                 }
229             }
230              catch (Exception ex)
231             {
232                 logger.error(ex.getMessage());
233                 statistic = null;
234 
235                 return null;
236             }
237 
238             try
239             {
240                 statistic.process(mol, null);
241             }
242              catch (JOEProcessException ex)
243             {
244                 logger.error(ex.getMessage());
245                 statistic = null;
246 
247                 return null;
248             }
249         }
250 
251         //reader.close();
252         // store descriptor statistic in file
253         statistic.writeStatisticFileFor(inFile);
254 
255         return statistic;
256     }
257 
258     /**
259      *  Gets the descriptorStatistic attribute of the DescStatistic object
260      *
261      * @param descriptor  Description of the Parameter
262      * @return            The descriptorStatistic value
263      */
264     public ArrayStatistic getDescriptorStatistic(String descriptor)
265     {
266         //        if(statistic==null)return null;
267         ArrayStatistic arrayStat = (ArrayStatistic) statistic.get(descriptor);
268 
269         if (arrayStat == null)
270         {
271             logger.error("There exist no descriptor statistic for '" +
272                 descriptor + "'");
273 
274             return null;
275         }
276 
277         arrayStat.calculateDerived();
278 
279         return arrayStat;
280     }
281 
282     /**
283      *  Gets the descriptors attribute of the DescStatistic object
284      *
285      * @return   The descriptors value
286      */
287     public Enumeration getDescriptors()
288     {
289         //      if(statistic==null)return null;
290         return statistic.keys();
291     }
292 
293     /**
294      *  Gets the descriptorStatistic attribute of the DescStatistic object
295      *
296      * @param descriptor  Description of the Parameter
297      * @return            The descriptorStatistic value
298      */
299     public boolean isNative(String descriptor)
300     {
301         return !notNative.containsKey(descriptor);
302     }
303 
304     /**
305      *  Description of the Method
306      *
307      * @return   Description of the Return Value
308      */
309     public boolean clear()
310     {
311         //        if(statistic==null)return false;
312         statistic.clear();
313 
314         return true;
315     }
316 
317     /**
318      * Description of the Method
319      *
320      * @param fileName  Description of the Parameter
321      * @return          Description of the Return Value
322      */
323     public static boolean existsStatisticFileFor(String fileName)
324     {
325         FileInputStream fis = null;
326 
327         // try to open file
328         try
329         {
330             fis = new FileInputStream(fileName + FILE_EXT);
331         }
332          catch (Exception ex)
333         {
334             return false;
335         }
336 
337         return true;
338     }
339 
340     /**
341      * Description of the Method
342      *
343      * @param fileName  Description of the Parameter
344      * @return          Description of the Return Value
345      */
346     public boolean fromFile(String fileName)
347     {
348         LineNumberReader lnr = null;
349         String line;
350         boolean ok = true;
351         URL location = this.getClass().getClassLoader().getSystemResource(fileName);
352         String fName;
353 
354         if (location != null)
355         {
356             fName = location.getFile();
357         }
358         else
359         {
360             fName = fileName;
361         }
362 
363         // try to open file
364         try
365         {
366             lnr = new LineNumberReader(new InputStreamReader(
367                         new FileInputStream(fName)));
368 
369             if ((line = lnr.readLine()) == null)
370             {
371                 return (false);
372             }
373 
374             StringTokenizer st;
375             int i;
376             ArrayStatistic arrayStat = null;
377             String descriptor = null;
378             String noNativeName = null;
379             int tokens;
380 
381             // define array statistic data types
382             int count = 0;
383             double sum = Double.NaN;
384             double sumSq = Double.NaN;
385             double stdDev = Double.NaN;
386             double mean = Double.NaN;
387             double min = Double.NaN;
388             double max = Double.NaN;
389 
390             // read statistic data
391             String token;
392 
393             while ((line = lnr.readLine()) != null)
394             {
395                 if (line.length() == 0 /*|| line.charAt(0)=='#' */    )
396                 {
397                     continue;
398                 }
399 
400                 st = new StringTokenizer(line, " \r\n\t");
401                 tokens = st.countTokens();
402 
403                 //                System.out.println("line ("+tokens+"): "+line);
404                 i = 0;
405 
406                 if (tokens == 8)
407                 {
408                     while (st.hasMoreTokens())
409                     {
410                         i++;
411                         token = st.nextToken();
412 
413                         try
414                         {
415                             switch (i)
416                             {
417                             case 1:
418                                 descriptor = token;
419 
420                                 break;
421 
422                             case 2:
423                                 count = (int) Double.parseDouble(token);
424 
425                                 break;
426 
427                             case 3:
428                                 min = Double.parseDouble(token);
429 
430                                 break;
431 
432                             case 4:
433                                 max = Double.parseDouble(token);
434 
435                                 break;
436 
437                             case 5:
438                                 sum = Double.parseDouble(token);
439 
440                                 break;
441 
442                             case 6:
443                                 sumSq = Double.parseDouble(token);
444 
445                                 break;
446 
447                             case 7:
448                                 mean = Double.parseDouble(token);
449 
450                                 break;
451 
452                             case 8:
453                                 stdDev = Double.parseDouble(token);
454 
455                                 break;
456                             }
457                         }
458                          catch (NumberFormatException ex)
459                         {
460                             ok = false;
461                             logger.error(ex.toString());
462                         }
463                     }
464 
465                     arrayStat = new ArrayStatistic(count, min, max, sum, sumSq,
466                             mean, stdDev);
467 
468                     statistic.put(descriptor, arrayStat);
469 
470                     //                    System.out.println(""+descriptor+" "+arrayStat.toString());
471                 }
472                 else if (tokens == 3)
473                 {
474                     while (st.hasMoreTokens())
475                     {
476                         i++;
477                         token = st.nextToken();
478 
479                         switch (i)
480                         {
481                         case 1:
482                             descriptor = token;
483 
484                             break;
485 
486                         case 2:
487                             count = (int) Double.parseDouble(token);
488 
489                             break;
490 
491                         case 3:
492                             noNativeName = token;
493 
494                             break;
495                         }
496                     }
497 
498                     arrayStat = new ArrayStatistic();
499                     arrayStat.count = count;
500                     statistic.put(descriptor, arrayStat);
501                     notNative.put(descriptor, noNativeName);
502                 }
503                 else
504                 {
505                     logger.error("Wrong format in line " + lnr.getLineNumber());
506                     ok = false;
507                 }
508             }
509         }
510          catch (IOException ex)
511         {
512             logger.error(ex.toString());
513             ok = false;
514         }
515 
516         return ok;
517     }
518 
519     public boolean fromFileFor(String fileName)
520     {
521         String fn = fileName + FILE_EXT;
522         logger.info("Load descriptor statistic from " + fn);
523 
524         return fromFile(fn);
525     }
526 
527     /**
528      *  Description of the Method
529      *
530      * @param descriptor  Description of the Parameter
531      * @return            Description of the Return Value
532      */
533     public boolean hasDescriptorStatistic(String descriptor)
534     {
535         //        if(statistic==null)return false;
536         return statistic.containsKey(descriptor);
537     }
538 
539     /*-------------------------------------------------------------------------*
540      * public  methods
541      *-------------------------------------------------------------------------*/
542 
543     /**
544      *  Description of the Method
545      *
546      * @return   Description of the Return Value
547      */
548     public JOEProperty[] neededProperties()
549     {
550         //    return ACCEPTED_PROPERTIES;
551         return null;
552     }
553 
554     /**
555      *  Description of the Method
556      *
557      * @param mol                      Description of the Parameter
558      * @param properties               Description of the Parameter
559      * @return                         Description of the Return Value
560      * @exception JOEProcessException  Description of the Exception
561      */
562     public boolean process(JOEMol mol, Map properties)
563         throws JOEProcessException
564     {
565         try
566         {
567             super.process(mol, properties);
568         }
569          catch (JOEProcessException e)
570         {
571             throw new JOEProcessException("Properties for " +
572                 this.getClass().getName() + " not correct.");
573         }
574 
575         //    System.out.println("processing:::"+mol.getTitle());
576         JOEGenericData genericData;
577         GenericDataIterator gdit = mol.genericDataIterator();
578         ArrayStatistic arrayStat;
579         String descriptor;
580 
581         //        String ignoreDesc = PropertyHolder.instance().getProperties().getProperty("jcompchem.joelib.process.DescStatistic.ignoreDescriptor", "Entry_Number");        while (gdit.hasNext())
582         boolean ignoreDesc = false;
583 
584         while (gdit.hasNext())
585         {
586             genericData = gdit.nextGenericData();
587             descriptor = genericData.getAttribute();
588 
589             // ignore descriptors in list
590             if (desc2ignore != null)
591             {
592                 ignoreDesc = false;
593 
594                 for (int i = 0; i < desc2ignore.size(); i++)
595                 {
596                     if (descriptor.equals((String) desc2ignore.get(i)))
597                     {
598                         //            System.out.println("ignore " + desc2ignore.get(i));
599                         ignoreDesc = true;
600 
601                         break;
602                     }
603                 }
604 
605                 if (ignoreDesc)
606                 {
607                     continue;
608                 }
609             }
610 
611             // parse data, if possible
612             genericData = mol.getData(descriptor, true);
613 
614             // check descriptor statistic entry
615             if (statistic.containsKey(descriptor))
616             {
617                 arrayStat = (ArrayStatistic) statistic.get(descriptor);
618             }
619             else
620             {
621                 arrayStat = new ArrayStatistic();
622                 statistic.put(descriptor, arrayStat);
623             }
624 
625             if (genericData.getDataType() == JOEDataType.JOE_PAIR_DATA)
626             {
627                 JOEPairData data = (JOEPairData) genericData;
628 
629                 if (JOEHelper.hasInterface(data, "NativeValue"))
630                 {
631                     arrayStat.add(((NativeValue) data).getDoubleNV());
632                 }
633                 else
634                 {
635                     arrayStat.count += 1;
636 
637                     String notNativeName = data.getValue().getClass().getName();
638 
639                     if (!notNative.containsKey(descriptor))
640                     {
641                         notNative.put(descriptor, notNativeName);
642                     }
643                 }
644             }
645         }
646 
647         return true;
648     }
649 
650     /**
651      * Description of the Method
652      *
653      * @param _desc  Description of the Parameter
654      * @param as     Description of the Parameter
655      * @return       Description of the Return Value
656      */
657     public Object putArrayStatistic(String _desc, ArrayStatistic as)
658     {
659         return statistic.put(_desc, as);
660     }
661 
662     /**
663      *  Description of the Method
664      *
665      * @param descriptor  Description of the Parameter
666      * @return            Description of the Return Value
667      */
668     public String showDescriptorStatistic(String descriptor)
669     {
670         //        if(statistic==null)return null;
671         ArrayStatistic arrayStat = (ArrayStatistic) statistic.get(descriptor);
672 
673         if (arrayStat == null)
674         {
675             logger.error("There exist no descriptor statistic for '" +
676                 descriptor + "'");
677 
678             return null;
679         }
680 
681         arrayStat.calculateDerived();
682 
683         StringBuffer sb = new StringBuffer(100);
684         sb.append(descriptor);
685         sb.append('\n');
686         sb.append(arrayStat.toString());
687         sb.append('\n');
688 
689         return sb.toString();
690     }
691 
692     /**
693      *  Description of the Method
694      *
695      * @return   Description of the Return Value
696      */
697     public String toString()
698     {
699         //        if(statistic==null)return null;
700         StringBuffer sb = new StringBuffer(10000);
701 
702         sb.append("#Descriptor Count Min Max Sum SumSq Mean StdDev\n");
703 
704         ArrayStatistic arrayStat;
705         String descriptor;
706         String noNativeName;
707 
708         for (Enumeration e = getDescriptors(); e.hasMoreElements();)
709         {
710             descriptor = (String) e.nextElement();
711 
712             //            sb.append(showDescriptorStatistic((String)e.nextElement()));
713             if (notNative.containsKey(descriptor))
714             {
715                 arrayStat = getDescriptorStatistic(descriptor);
716                 noNativeName = (String) notNative.get(descriptor);
717                 sb.append(descriptor);
718                 sb.append(' ');
719                 sb.append((int) arrayStat.count);
720                 sb.append(' ');
721                 sb.append(noNativeName);
722             }
723             else
724             {
725                 arrayStat = getDescriptorStatistic(descriptor);
726                 arrayStat.calculateDerived();
727                 sb.append(descriptor);
728                 sb.append(' ');
729                 sb.append((int) arrayStat.count);
730                 sb.append(' ');
731                 sb.append(arrayStat.min);
732                 sb.append(' ');
733                 sb.append(arrayStat.max);
734                 sb.append(' ');
735                 sb.append(arrayStat.sum);
736                 sb.append(' ');
737                 sb.append(arrayStat.sumSq);
738                 sb.append(' ');
739                 sb.append(arrayStat.mean);
740                 sb.append(' ');
741                 sb.append(arrayStat.stdDev);
742             }
743 
744             sb.append('\n');
745         }
746 
747         return sb.toString();
748     }
749 
750     public void writeStatisticFileFor(String _inFile)
751     {
752         String filename = _inFile + FILE_EXT;
753         PrintStream ps = null;
754 
755         try
756         {
757             ps = new PrintStream(new FileOutputStream(filename));
758             ps.println(this.toString());
759             logger.info("Statistic for " + _inFile);
760             logger.info("  written to " + filename);
761         }
762          catch (Exception ex)
763         {
764             logger.warn(ex.toString());
765             logger.warn("Statistic not written for " + _inFile);
766         }
767     }
768 
769     /*-------------------------------------------------------------------------*
770      * protected  methods
771      *-------------------------------------------------------------------------*/
772 }
773 ///////////////////////////////////////////////////////////////////////////////
774 //  END OF FILE.
775 ///////////////////////////////////////////////////////////////////////////////