Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: joelib/process/types/DescBinning.java


1   ///////////////////////////////////////////////////////////////////////////////
2   //  Filename: $RCSfile: DescBinning.java,v $
3   //  Purpose:  Counts the number of descriptors and molecules in a molecule file.
4   //  Language: Java
5   //  Compiler: JDK 1.4
6   //  Authors:  Joerg K. Wegner
7   //  Version:  $Revision: 1.15 $
8   //            $Date: 2003/08/22 15:56:20 $
9   //            $Author: wegner $
10  //
11  //  Copyright (c) Dept. Computer Architecture, University of Tuebingen, Germany
12  //
13  //  This program is free software; you can redistribute it and/or modify
14  //  it under the terms of the GNU General Public License as published by
15  //  the Free Software Foundation version 2 of the License.
16  //
17  //  This program is distributed in the hope that it will be useful,
18  //  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  //  GNU General Public License for more details.
21  ///////////////////////////////////////////////////////////////////////////////
22  package joelib.process.types;
23  
24  import joelib.data.JOEDataType;
25  import joelib.data.JOEGenericData;
26  import joelib.data.JOEPairData;
27  
28  import joelib.desc.NativeValue;
29  
30  import joelib.io.IOType;
31  import joelib.io.SimpleReader;
32  
33  import joelib.molecule.JOEMol;
34  import joelib.molecule.JOEMolVector;
35  
36  import joelib.process.JOEProcessException;
37  import joelib.process.SimpleProcess;
38  
39  import joelib.util.JOEHelper;
40  import joelib.util.JOEProperty;
41  
42  import joelib.util.iterator.GenericDataIterator;
43  
44  import wsi.ra.tool.ArrayBinning;
45  import wsi.ra.tool.ArrayStatistic;
46  import wsi.ra.tool.PropertyHolder;
47  import wsi.ra.tool.ResourceLoader;
48  
49  /*==========================================================================*
50   * IMPORTS
51   *==========================================================================      */
52  import java.io.FileInputStream;
53  import java.io.FileOutputStream;
54  import java.io.IOException;
55  import java.io.InputStreamReader;
56  import java.io.LineNumberReader;
57  import java.io.PrintStream;
58  
59  import java.net.URL;
60  
61  import java.util.Enumeration;
62  import java.util.Hashtable;
63  import java.util.Map;
64  import java.util.StringTokenizer;
65  import java.util.Vector;
66  
67  import org.apache.log4j.Category;
68  
69  
70  /*==========================================================================*
71   * CLASS DECLARATION
72   *==========================================================================      */
73  
74  /**
75   *  Calling processor classes if the filter rule fits.
76   *
77   * @author     wegnerj
78   * @license    GPL
79   * @cvsversion    $Revision: 1.15 $, $Date: 2003/08/22 15:56:20 $
80   */
81  public class DescBinning extends SimpleProcess implements java.io.Serializable
82  {
83      //~ Static fields/initializers /////////////////////////////////////////////
84  
85      /*-------------------------------------------------------------------------*
86       *  public static member variables
87       *-------------------------------------------------------------------------      */
88  
89      /**
90       *  Obtain a suitable logger.
91       */
92      private static Category logger = Category.getInstance(
93              "joelib.process.types.DescBinning");
94  
95      //  private final static  JOEProperty[]  ACCEPTED_PROPERTIES    = new JOEProperty[]{
96      //      new JOEProperty("NUMBER_OF_BINS", "java.lang.Integer", "Number of bins to create.", true),
97      //      };
98      private final static String FILE_EXT = ".binning";
99  
100     //~ Instance fields ////////////////////////////////////////////////////////
101 
102     private DescStatistic statistic;
103     private Hashtable bins = new Hashtable();
104     private Vector desc2ignore;
105     private int numberOfBins = -1;
106 
107     //~ Constructors ///////////////////////////////////////////////////////////
108 
109     /*-------------------------------------------------------------------------*
110      *  constructor
111      *-------------------------------------------------------------------------      */
112 
113     /**
114      *  Constructor for the DescSelectionWriter object
115      */
116     public DescBinning()
117     {
118         statistic = new DescStatistic();
119         clear();
120 
121         // load descriptors which should be ignored
122         String value;
123 
124         if ((value = PropertyHolder.instance().getProperty(this,
125                         "descriptors2ignore")) == null)
126         {
127         }
128         else
129         {
130             Vector tmpVec = ResourceLoader.readLines(value);
131 
132             if (tmpVec == null)
133             {
134                 logger.error("File with descriptor names to ignore not found.");
135             }
136 
137             desc2ignore = tmpVec;
138         }
139 
140         numberOfBins = PropertyHolder.instance().getInt(this, "numberOfBins",
141                 1, Integer.MAX_VALUE, 20);
142     }
143 
144     //~ Methods ////////////////////////////////////////////////////////////////
145 
146     //  public static DescBinning getDescBinning(IOType inType, String inFile, int _numberOfBins)
147     //  {
148     //    FileInputStream fis=null;
149     //    try
150     //    {
151     //      fis=new FileInputStream(inFile);
152     //    }
153     //    catch (Exception ex)
154     //    {
155     //      ex.printStackTrace();
156     //      return null;
157     //    }
158     //
159     //    return getDescBinning(inType, fis, _numberOfBins);
160     //  }
161     //  public static DescBinning getDescBinning(IOType inType, InputStream inStream, int _numberOfBins)
162 
163     /**
164      * Gets the descBinning attribute of the DescBinning class
165      *
166      * @param inType         Description of the Parameter
167      * @param inFile         Description of the Parameter
168      * @param _numberOfBins  Description of the Parameter
169      * @return               The descBinning value
170      */
171     public static DescBinning getDescBinning(IOType inType, String inFile)
172     {
173         return getDescBinning(inType, inFile, -1);
174     }
175 
176     /**
177      * Gets the descBinning attribute of the DescBinning class
178      *
179      * @param inType         Description of the Parameter
180      * @param inFile         Description of the Parameter
181      * @param _numberOfBins  Description of the Parameter
182      * @return               The descBinning value
183      */
184     public static DescBinning getDescBinning(JOEMolVector molecules)
185     {
186         DescBinning binning = new DescBinning();
187 
188         binning.statistic = DescStatistic.getDescStatistic(molecules);
189 
190         int size = molecules.getSize();
191         JOEMol mol;
192 
193         for (int i = 0; i < size; i++)
194         {
195             mol = molecules.getMol(i);
196 
197             try
198             {
199                 binning.process(mol, null);
200             }
201              catch (JOEProcessException ex)
202             {
203                 ex.printStackTrace();
204 
205                 return null;
206             }
207         }
208 
209         return binning;
210     }
211 
212     public static DescBinning getDescBinning(IOType inType, String inFile,
213         int _numberOfBins)
214     {
215         DescBinning binning = new DescBinning();
216 
217         // load descriptor binning if file exists
218         if (existsBinningFileFor(inFile))
219         {
220             binning.fromFileFor(inFile);
221 
222             return binning;
223         }
224 
225         // create new descriptor binning
226         //    descriptors = _descriptors;
227         //        InputStream clonedIS = null;
228         SimpleReader reader = null;
229 
230         try
231         {
232             if (_numberOfBins < 1)
233             {
234                 binning.init(inType, inFile);
235             }
236             else
237             {
238                 binning.init(inType, inFile, _numberOfBins);
239             }
240 
241             //      clonedIS=(InputStream)inStream.clone();
242             reader = new SimpleReader(new FileInputStream(inFile), inType);
243         }
244          catch (Exception ex)
245         {
246             ex.printStackTrace();
247             logger.error(ex.getMessage());
248 
249             return null;
250         }
251 
252         logger.info("Calculate descriptor binning.");
253 
254         JOEMol mol = new JOEMol(inType, inType);
255 
256         for (;;)
257         {
258             try
259             {
260                 if (!reader.readNext(mol))
261                 {
262                     break;
263                 }
264             }
265              catch (Exception ex)
266             {
267                 ex.printStackTrace();
268                 logger.error(ex.getMessage());
269 
270                 return null;
271             }
272 
273             try
274             {
275                 binning.process(mol, null);
276             }
277              catch (JOEProcessException ex)
278             {
279                 ex.printStackTrace();
280                 logger.error(ex.getMessage());
281 
282                 return null;
283             }
284         }
285 
286         reader.close();
287         reader = null;
288 
289         // store descriptor binning in file
290         binning.writeBinningFileFor(inFile);
291 
292         return binning;
293     }
294 
295     /**
296      * Gets the descStatistic attribute of the DescBinning object
297      *
298      * @return   The descStatistic value
299      */
300     public DescStatistic getDescStatistic()
301     {
302         return statistic;
303     }
304 
305     /**
306      *  Gets the descriptorBinning attribute of the DescBinning object
307      *
308      * @param descriptor  Description of the Parameter
309      * @return            The descriptorBinning value
310      */
311     public ArrayBinning getDescriptorBinning(String descriptor)
312     {
313         if (statistic == null)
314         {
315             return null;
316         }
317 
318         ArrayBinning arrayBinning = (ArrayBinning) bins.get(descriptor);
319 
320         if (arrayBinning == null)
321         {
322             logger.error("There exist no descriptor binning for '" +
323                 descriptor + "'");
324 
325             return null;
326         }
327 
328         arrayBinning.calculateDerived();
329 
330         return arrayBinning;
331     }
332 
333     /**
334      *  Gets the descriptors attribute of the DescBinning object
335      *
336      * @return   The descriptors value
337      */
338     public Enumeration getDescriptors()
339     {
340         if (statistic == null)
341         {
342             return null;
343         }
344 
345         return bins.keys();
346     }
347 
348     /**
349      *  Description of the Method
350      *
351      * @return   Description of the Return Value
352      */
353     public boolean clear()
354     {
355         if (statistic == null)
356         {
357             return false;
358         }
359 
360         /*if (!statistic.clear())
361          *{
362          *return false;
363          *}   */
364 
365         //bins.clear();
366         return true;
367     }
368 
369     /**
370      * Description of the Method
371      *
372      * @param fileName  Description of the Parameter
373      * @return          Description of the Return Value
374      */
375     public static boolean existsBinningFileFor(String fileName)
376     {
377         FileInputStream fis = null;
378 
379         // try to open file
380         try
381         {
382             fis = new FileInputStream(fileName + FILE_EXT);
383         }
384          catch (Exception ex)
385         {
386             return false;
387         }
388 
389         return true;
390     }
391 
392     /**
393      * Description of the Method
394      *
395      * @param fileName  Description of the Parameter
396      * @return          Description of the Return Value
397      */
398     public boolean fromFile(String fileName)
399     {
400         LineNumberReader lnr = null;
401         String line;
402         boolean ok = true;
403         int VARS = 10;
404         URL location = this.getClass().getClassLoader().getSystemResource(fileName);
405         String fName;
406 
407         if (location != null)
408         {
409             fName = location.getFile();
410         }
411         else
412         {
413             fName = fileName;
414         }
415 
416         // try to open file
417         try
418         {
419             lnr = new LineNumberReader(new InputStreamReader(
420                         new FileInputStream(fName)));
421 
422             StringTokenizer st;
423             int tokens = 0;
424             String token;
425             int i;
426 
427             // get binning structure from first line
428             if ((line = lnr.readLine()) == null)
429             {
430                 return (false);
431             }
432 
433             //            System.out.println("line:"+line);
434             st = new StringTokenizer(line, " \r\n\t");
435             tokens = st.countTokens();
436             numberOfBins = tokens - VARS;
437 
438             if (numberOfBins <= 0)
439             {
440                 logger.error("Negative number of bins.");
441 
442                 return false;
443             }
444 
445             // read data
446             ArrayStatistic arrayStat = null;
447             ArrayBinning arrayBinning = null;
448             String descriptor = null;
449 
450             // define array statistic data types
451             int count = 0;
452             double sum = Double.NaN;
453             double sumSq = Double.NaN;
454             double stdDev = Double.NaN;
455             double mean = Double.NaN;
456             double min = Double.NaN;
457             double max = Double.NaN;
458             double shannonEntropy = Double.NaN;
459             double entropy = Double.NaN;
460             boolean containsNaN = false;
461             int[] tmpA = new int[numberOfBins];
462 
463             // read statistic data
464             while ((line = lnr.readLine()) != null)
465             {
466                 if (line.length() == 0 /*|| line.charAt(0)=='#' */    )
467                 {
468                     continue;
469                 }
470 
471                 st = new StringTokenizer(line, " \r\n\t");
472                 tokens = st.countTokens();
473 
474                 //                System.out.println("line ("+tokens+", b="+numberOfBins+"): "+line);
475                 i = 0;
476 
477                 if (tokens == (VARS + numberOfBins))
478                 {
479                     while (st.hasMoreTokens())
480                     {
481                         i++;
482                         token = st.nextToken();
483 
484                         try
485                         {
486                             switch (i)
487                             {
488                             case 1:
489                                 descriptor = token;
490 
491                                 break;
492 
493                             case 2:
494                                 count = (int) Double.parseDouble(token);
495 
496                                 break;
497 
498                             case 3:
499                                 shannonEntropy = Double.parseDouble(token);
500 
501                                 break;
502 
503                             case 4:
504                                 entropy = (int) Double.parseDouble(token);
505 
506                                 break;
507 
508                             case 5:
509                                 min = Double.parseDouble(token);
510 
511                                 break;
512 
513                             case 6:
514                                 max = Double.parseDouble(token);
515 
516                                 break;
517 
518                             case 7:
519                                 sum = Double.parseDouble(token);
520 
521                                 break;
522 
523                             case 8:
524                                 sumSq = Double.parseDouble(token);
525 
526                                 break;
527 
528                             case 9:
529                                 mean = Double.parseDouble(token);
530 
531                                 break;
532 
533                             case 10:
534                                 stdDev = Double.parseDouble(token);
535 
536                                 break;
537 
538                             case 11:
539                                 containsNaN = Boolean.valueOf(token)
540                                                      .booleanValue();
541 
542                                 break;
543 
544                             default:
545 
546                                 //                                    System.out.print(" "+(i-VARS-1)+"="+token);
547                                 tmpA[i - VARS - 1] = Integer.parseInt(token);
548 
549                                 break;
550                             }
551                         }
552                          catch (NumberFormatException ex)
553                         {
554                             ok = false;
555                             ex.printStackTrace();
556                             logger.error(ex.toString());
557                         }
558                     }
559 
560                     arrayStat = new ArrayStatistic(count, min, max, sum, sumSq,
561                             mean, stdDev);
562                     statistic.putArrayStatistic(descriptor, arrayStat);
563                     arrayBinning = new ArrayBinning(numberOfBins, arrayStat);
564                     arrayBinning.shannonEntropy = shannonEntropy;
565                     arrayBinning.entropy = entropy;
566                     arrayBinning.binning = new int[numberOfBins];
567                     arrayBinning.containsNaN = containsNaN;
568                     System.arraycopy(tmpA, 0, arrayBinning.binning, 0,
569                         numberOfBins);
570                     bins.put(descriptor, arrayBinning);
571 
572                     //                    System.out.print(""+descriptor+" "+arrayBinning.toString());
573                 }
574                 else
575                 {
576                     logger.error("Wrong format in line " + lnr.getLineNumber());
577                     ok = false;
578                 }
579             }
580         }
581          catch (IOException ex)
582         {
583             ex.printStackTrace();
584             logger.error(ex.toString());
585             ok = false;
586         }
587 
588         return ok;
589     }
590 
591     public boolean fromFileFor(String fileName)
592     {
593         String fn = fileName + FILE_EXT;
594         logger.info("Load descriptor binning from " + fn);
595 
596         return fromFile(fn);
597     }
598 
599     /*-------------------------------------------------------------------------*
600      * public  methods
601      *-------------------------------------------------------------------------      */
602 
603     /**
604      *  Description of the Method
605      *
606      * @param _statistic     Description of the Parameter
607      * @param _numberOfBins  Description of the Parameter
608      */
609     public void init(DescStatistic _statistic)
610     {
611         statistic = _statistic;
612     }
613 
614     /**
615      *  Description of the Method
616      *
617      * @param _statistic     Description of the Parameter
618      * @param _numberOfBins  Description of the Parameter
619      */
620     public void init(DescStatistic _statistic, int _numberOfBins)
621     {
622         statistic = _statistic;
623         numberOfBins = _numberOfBins;
624     }
625 
626     /**
627      *  Description of the Method
628      *
629      * @param inType         Description of the Parameter
630      * @param _numberOfBins  Description of the Parameter
631      * @param inFile         Description of the Parameter
632      * @exception Exception  Description of the Exception
633      */
634     public void init(IOType inType, String inFile) throws Exception
635     {
636         statistic = DescStatistic.getDescStatistic(inType, inFile);
637     }
638 
639     /**
640      *  Description of the Method
641      *
642      * @param inType         Description of the Parameter
643      * @param _numberOfBins  Description of the Parameter
644      * @param inFile         Description of the Parameter
645      * @exception Exception  Description of the Exception
646      */
647     public void init(IOType inType, String inFile, int _numberOfBins)
648         throws Exception
649     {
650         numberOfBins = _numberOfBins;
651         statistic = DescStatistic.getDescStatistic(inType, inFile);
652 
653         //        System.out.println("Descriptor statistic for binning:\n " + statistic.toString());
654     }
655 
656     /**
657      *  Description of the Method
658      *
659      * @return   Description of the Return Value
660      */
661     public JOEProperty[] neededProperties()
662     {
663         //    return ACCEPTED_PROPERTIES;
664         return null;
665     }
666 
667     /**
668      *  Description of the Method
669      *
670      * @return   Description of the Return Value
671      */
672     public int numberOfDescriptors()
673     {
674         if (statistic == null)
675         {
676             return -1;
677         }
678 
679         return bins.size();
680     }
681 
682     /**
683      *  Description of the Method
684      *
685      * @param mol                      Description of the Parameter
686      * @param properties               Description of the Parameter
687      * @return                         Description of the Return Value
688      * @exception JOEProcessException  Description of the Exception
689      */
690     public boolean process(JOEMol mol, Map properties)
691         throws JOEProcessException
692     {
693         if (statistic == null)
694         {
695             return false;
696         }
697 
698         try
699         {
700             super.process(mol, properties);
701         }
702          catch (JOEProcessException e)
703         {
704             throw new JOEProcessException("Properties for " +
705                 this.getClass().getName() + " not correct.");
706         }
707 
708         JOEGenericData genericData;
709         GenericDataIterator gdit = mol.genericDataIterator();
710         ArrayStatistic arrayStat;
711         String descriptor;
712         double value = 0.0;
713         ArrayBinning arrayBinning;
714         boolean ignoreDesc = false;
715 
716         while (gdit.hasNext())
717         {
718             genericData = gdit.nextGenericData();
719             descriptor = genericData.getAttribute();
720 
721             // ignore descriptors in list
722             if (desc2ignore != null)
723             {
724                 ignoreDesc = false;
725 
726                 for (int i = 0; i < desc2ignore.size(); i++)
727                 {
728                     if (descriptor.equals((String) desc2ignore.get(i)))
729                     {
730                         ignoreDesc = true;
731 
732                         break;
733                     }
734                 }
735 
736                 if (ignoreDesc)
737                 {
738                     continue;
739                 }
740             }
741 
742             if (genericData.getDataType() == JOEDataType.JOE_PAIR_DATA)
743             {
744                 // parse data, if possible
745                 genericData = mol.getData(descriptor, true);
746 
747                 // check descriptor binning entry
748                 if (bins.containsKey(descriptor))
749                 {
750                     arrayBinning = (ArrayBinning) bins.get(descriptor);
751                 }
752                 else
753                 {
754                     // check descriptor statistic entry
755                     arrayStat = statistic.getDescriptorStatistic(descriptor);
756 
757                     if (arrayStat == null)
758                     {
759                         logger.error("Statistic for " + descriptor +
760                             " does not exist.");
761 
762                         return false;
763                     }
764 
765                     arrayBinning = new ArrayBinning(numberOfBins, arrayStat);
766                     bins.put(descriptor, arrayBinning);
767                 }
768 
769                 JOEPairData data = (JOEPairData) genericData;
770 
771                 if (JOEHelper.hasInterface(data, "NativeValue"))
772                 {
773                     value = ((NativeValue) data).getDoubleNV();
774 
775                     if (arrayBinning.add(value) == -1)
776                     {
777                         logger.error("Out of range (" + value + ") in " +
778                             descriptor);
779 
780                         return false;
781                     }
782                 }
783 
784                 //                else
785                 //                {
786                 //                }
787             }
788         }
789 
790         return true;
791     }
792 
793     /**
794      *  Description of the Method
795      *
796      * @return   Description of the Return Value
797      */
798     public String toString()
799     {
800         if (statistic == null)
801         {
802             return null;
803         }
804 
805         StringBuffer sb = new StringBuffer(10000);
806 
807         //sb.append("Descriptor Binning\n");
808         sb.append(
809             "#Descriptor Count ShannonEntropy Entropy Min Max Sum SumSq Mean StdDev NaN");
810 
811         for (int i = 1; i <= numberOfBins; i++)
812         {
813             sb.append(" bin");
814             sb.append(i);
815         }
816 
817         sb.append("\n");
818 
819         ArrayBinning arrayBinning;
820         String descriptor;
821 
822         for (Enumeration e = getDescriptors(); e.hasMoreElements();)
823         {
824             descriptor = (String) e.nextElement();
825 
826             //            sb.append(showDescriptorStatistic((String)e.nextElement()));
827             arrayBinning = (ArrayBinning) bins.get(descriptor);
828             sb.append(descriptor);
829             sb.append(' ');
830             sb.append(arrayBinning.toString());
831         }
832 
833         return sb.toString();
834     }
835 
836     public void writeBinningFileFor(String _inFile)
837     {
838         String filename = _inFile + FILE_EXT;
839         PrintStream ps = null;
840 
841         try
842         {
843             ps = new PrintStream(new FileOutputStream(filename));
844             ps.println(this.toString());
845             logger.info("Binning for " + _inFile);
846             logger.info("  written to " + filename);
847         }
848          catch (Exception ex)
849         {
850             logger.warn(ex.toString());
851             logger.warn("Binning not written for " + _inFile);
852         }
853     }
854 
855     /*-------------------------------------------------------------------------*
856      * private  methods
857      *-------------------------------------------------------------------------      */
858 }
859 ///////////////////////////////////////////////////////////////////////////////
860 //  END OF FILE.
861 ///////////////////////////////////////////////////////////////////////////////