Source code: joelib/process/types/DescVarianceNorm.java
1 ///////////////////////////////////////////////////////////////////////////////
2 // Filename: $RCSfile: DescVarianceNorm.java,v $
3 // Purpose: Counts the number of descriptors and molecules in a molecule file.
4 // Language: Java
5 // Compiler: JDK 1.4
6 // Authors: Joerg K. Wegner
7 // Version: $Revision: 1.7 $
8 // $Date: 2003/12/03 18:15:34 $
9 // $Author: wegner $
10 //
11 // Copyright (c) Dept. Computer Architecture, University of Tuebingen, Germany
12 //
13 // This program is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation version 2 of the License.
16 //
17 // This program is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 // GNU General Public License for more details.
21 ///////////////////////////////////////////////////////////////////////////////
22 package joelib.process.types;
23
24 import joelib.data.JOEDataType;
25 import joelib.data.JOEGenericData;
26 import joelib.data.JOEPairData;
27
28 import joelib.desc.NativeValue;
29 import joelib.desc.result.DoubleResult;
30 import joelib.desc.result.IntResult;
31
32 import joelib.io.IOType;
33
34 import joelib.molecule.JOEMol;
35
36 import joelib.process.JOEProcessException;
37 import joelib.process.SimpleProcess;
38
39 import joelib.util.JOEHelper;
40 import joelib.util.JOEProperty;
41
42 import joelib.util.iterator.GenericDataIterator;
43
44 import wsi.ra.tool.ArrayStatistic;
45 import wsi.ra.tool.PropertyHolder;
46 import wsi.ra.tool.ResourceLoader;
47
48 /*==========================================================================*
49 * IMPORTS
50 *==========================================================================*/
51 import java.util.Map;
52 import java.util.Vector;
53
54 import org.apache.log4j.Category;
55
56
57 /*==========================================================================*
58 * CLASS DECLARATION
59 *==========================================================================*/
60
61 /**
62 * Scales the values in one descriptor so that they have similar magnitudes.
63 *
64 * TeX: $x_i^n$ = \frac{x_i-\overline{x}}{\sigma _i}
65 *
66 * @author wegnerj
67 * @license GPL
68 * @cvsversion $Revision: 1.7 $, $Date: 2003/12/03 18:15:34 $
69 */
70 public class DescVarianceNorm extends SimpleProcess
71 {
72 //~ Static fields/initializers /////////////////////////////////////////////
73
74 /*-------------------------------------------------------------------------*
75 * public static member variables
76 *-------------------------------------------------------------------------*/
77
78 /**
79 * Obtain a suitable logger.
80 */
81 private static Category logger = Category.getInstance(
82 "joelib.process.types.DescVarianceNorm");
83
84 //~ Instance fields ////////////////////////////////////////////////////////
85
86 // private final static JOEProperty[] ACCEPTED_PROPERTIES = new JOEProperty[]{
87 // new JOEProperty("NUMBER_OF_BINS", "java.lang.Integer", "Number of bins to create.", true),
88 // };
89 private DescStatistic statistic;
90 private Vector desc2ignore;
91
92 //~ Constructors ///////////////////////////////////////////////////////////
93
94 /*-------------------------------------------------------------------------*
95 * constructor
96 *-------------------------------------------------------------------------*/
97
98 /**
99 * Constructor for the DescSelectionWriter object
100 */
101 public DescVarianceNorm()
102 {
103 statistic = new DescStatistic();
104 clear();
105
106 // load descriptors which should be ignored
107 String value;
108
109 if ((value = PropertyHolder.instance().getProperty(this,
110 "descriptors2ignore")) == null)
111 {
112 }
113 else
114 {
115 Vector tmpVec = ResourceLoader.readLines(value);
116
117 if (tmpVec == null)
118 {
119 logger.error("File with descriptor names to ignore not found.");
120 }
121
122 desc2ignore = tmpVec;
123 }
124 }
125
126 //~ Methods ////////////////////////////////////////////////////////////////
127
128 /**
129 * Description of the Method
130 *
131 * @return Description of the Return Value
132 */
133 public boolean clear()
134 {
135 if (statistic == null)
136 {
137 return false;
138 }
139
140 return true;
141 }
142
143 public Vector descriptors2ignore()
144 {
145 return desc2ignore;
146 }
147
148 /*-------------------------------------------------------------------------*
149 * public methods
150 *-------------------------------------------------------------------------*/
151
152 /**
153 * Description of the Method
154 *
155 * @param _statistic Description of the Parameter
156 */
157 public void init(DescStatistic _statistic)
158 {
159 statistic = _statistic;
160 }
161
162 /**
163 * Description of the Method
164 *
165 * @param inType Description of the Parameter
166 * @param inStream Description of the Parameter
167 * @param _numberOfBins Description of the Parameter
168 * @exception Exception Description of the Exception
169 */
170 public void init(IOType inType, String inFile) throws Exception
171 {
172 logger.info(
173 "Creating statistical data for descriptor variance normalisation.");
174 statistic = DescStatistic.getDescStatistic(inType, inFile);
175 }
176
177 /**
178 * Description of the Method
179 *
180 * @return Description of the Return Value
181 */
182 public JOEProperty[] neededProperties()
183 {
184 // return ACCEPTED_PROPERTIES;
185 return null;
186 }
187
188 /**
189 * Description of the Method
190 *
191 * @param mol Description of the Parameter
192 * @param properties Description of the Parameter
193 * @return Description of the Return Value
194 * @exception JOEProcessException Description of the Exception
195 */
196 public boolean process(JOEMol mol, Map properties)
197 throws JOEProcessException
198 {
199 if (statistic == null)
200 {
201 return false;
202 }
203
204 try
205 {
206 super.process(mol, properties);
207 }
208 catch (JOEProcessException e)
209 {
210 throw new JOEProcessException("Properties for " +
211 this.getClass().getName() + " not correct.");
212 }
213
214 JOEGenericData genericData;
215 GenericDataIterator gdit = mol.genericDataIterator();
216 ArrayStatistic arrayStat;
217 String descriptor;
218 double value = 0.0;
219 double newValue;
220 boolean ignoreDesc = false;
221
222 while (gdit.hasNext())
223 {
224 genericData = gdit.nextGenericData();
225 descriptor = genericData.getAttribute();
226
227 // ignore descriptors in list
228 if (desc2ignore != null)
229 {
230 ignoreDesc = false;
231
232 for (int i = 0; i < desc2ignore.size(); i++)
233 {
234 if (descriptor.equals((String) desc2ignore.get(i)))
235 {
236 ignoreDesc = true;
237
238 break;
239 }
240 }
241
242 if (ignoreDesc)
243 {
244 continue;
245 }
246 }
247
248 if (genericData.getDataType() == JOEDataType.JOE_PAIR_DATA)
249 {
250 // parse data, if possible
251 genericData = mol.getData(descriptor, true);
252
253 // check descriptor statistic entry
254 arrayStat = statistic.getDescriptorStatistic(descriptor);
255
256 if (arrayStat == null)
257 {
258 logger.error("Statistic for " + descriptor +
259 " don't exist");
260
261 return false;
262 }
263
264 JOEPairData data = (JOEPairData) genericData;
265
266 if (JOEHelper.hasInterface(data, "NativeValue"))
267 {
268 value = ((NativeValue) data).getDoubleNV();
269 newValue = arrayStat.varianceNormalization(value);
270
271 // to avoid precision loss all normalized values will now be internally
272 // stored as double values.
273 ((NativeValue) data).setDoubleNV(newValue);
274 }
275 }
276 }
277
278 return true;
279 }
280
281 /**
282 * Description of the Method
283 *
284 * @return Description of the Return Value
285 */
286 public String toString()
287 {
288 if (statistic == null)
289 {
290 return null;
291 }
292
293 // StringBuffer sb = new StringBuffer(100);
294 // return sb.toString();
295 return null;
296 }
297
298 /*-------------------------------------------------------------------------*
299 * private methods
300 *-------------------------------------------------------------------------*/
301 }
302 ///////////////////////////////////////////////////////////////////////////////
303 // END OF FILE.
304 ///////////////////////////////////////////////////////////////////////////////