Source code: mrsoft/util/MRData.java
1 /**
2 // mrsoft.MRData: General wrapper class to load, store and save numeric floating point data
3 // Copyright (C) 2001 Michiel de Roo
4 //
5 // java version:
6 // date: 03/01/01
7 // author: Michiel de Roo
8 // e-mail: michiel@belangrijk.nl
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Library General Public
12 // License as published by the Free Software Foundation; either
13 // version 2 of the License, or (at your option) any later version.
14 //
15 // You should have received a copy of the GNU Library General Public
16 // License along with this library; if not, write to the
17 // Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 // Boston, MA 02111-1307, USA.
19 */
20 package mrsoft.util;
21 import mrsoft.util.*;
22
23 import java.lang.*;
24 import java.lang.reflect.Array;
25 import java.io.*;
26 import java.util.*;
27 import java.awt.TextArea;
28 import java.awt.FontMetrics;
29 import java.awt.Font;
30 import java.awt.Toolkit;
31 import java.text.NumberFormat;
32 import java.sql.*;
33 import xbase.*;
34
35 public class MRData implements Cloneable
36 {
37 boolean loaded=false; //true if data set is loaded from file
38
39 double[][] data; //containing the data
40 boolean[][] dl; //flag whether an entry is below detection limit
41 boolean[][] missing; //flag whether value is 'unreported'
42 public String[] varnames=null; //names of the variables
43 boolean transposed; //indicates whether this set is in transposed state
44 int rows=-1, cols=-1; //number of rows and colums (or fields and records, variables and samples)
45 double missing_val=-9999; //value to indicate whether an entry is missing (unknown)
46
47 NumberFormat nf;
48
49 public MRData()
50 {
51 nf = NumberFormat.getInstance();
52 nf.setMaximumFractionDigits(0);
53 nf.setMinimumFractionDigits(0);
54 }
55
56 /** Returns number of columns */
57 public int ncols() { return cols; }
58
59 /** Returns number of rows */
60 public int nrows() { return rows; }
61
62 /** Returns the (default?) variable name of a column */
63 public String getVarname(int col)
64 {
65 if(varnames==null | col>=cols) return null;
66 else return varnames[col];
67 }
68
69 /** Sets the value that indicates a value is unreported */
70 public void setMissingValue(double value) { missing_val = value; }
71
72 /** Returns the value that indicates a value is unreported */
73 public double getMissingValue() { return missing_val; }
74
75 /** Returns true if the value at row, col is unreported */
76 public boolean isMissing(int row, int col) { return (data[row][col]==missing_val); }
77
78 /** Returns whther the data is stored in transposed form as array[col][row] */
79 public boolean isTransposed() { return transposed; }
80
81 /** Loads data from a file */
82 public void load(String file)
83 {
84 try {
85 BufferedReader reader = new BufferedReader(new FileReader(file));
86 String input = null;
87
88 //read header if it exists
89 while(true) {
90 input = reader.readLine();
91 String[] t = StringUtil.parseString(input);
92
93 try {
94 //if a double value is encountered, this is assumed to be the start of the data table
95 double d = Double.valueOf(t[0]).doubleValue();
96 break;
97 }
98 catch (NumberFormatException e) {
99 //else it is part of the file header
100 if(t[0].equalsIgnoreCase("ndat") & Array.getLength(t)>1) rows = Integer.valueOf(t[1]).intValue();
101 else if(t[0].equalsIgnoreCase("ndim") & Array.getLength(t)>1) cols = Integer.valueOf(t[1]).intValue();
102 else if((t[0].equalsIgnoreCase("missing_val") | t[0].equalsIgnoreCase("missing_value")) & Array.getLength(t)>1) missing_val = Double.valueOf(t[1]).doubleValue();
103 else if(t[0].equalsIgnoreCase("varnames") & Array.getLength(t)==cols+1) {
104 varnames = new String[cols];
105 for (int i=0; i<cols; i++) varnames[i] = t[i+1];
106 }
107 else {
108 cols=Array.getLength(t);
109 varnames = new String[cols];
110 for (int i=0; i<cols; i++) varnames[i] = t[i];
111 }
112 }
113 }
114
115 //create default varnames if none were given
116 if(varnames==null) {
117 varnames = new String[cols];
118 nf.setMinimumIntegerDigits((int) ((Math.log((double)cols+1)/Math.log(10.0))+1) );
119 for (int i=0; i<cols; i++) {
120 varnames[i] = "var_" + nf.format(i+1);
121 }
122 }
123
124 //read lines from file to Vector lines
125 Vector lines = new Vector();
126 while(input!=null){
127 lines.addElement(input);
128 input = reader.readLine();
129 }
130
131 //Get data dimensions
132 StringTokenizer st = new StringTokenizer((String)lines.elementAt(0));
133 if(cols==-1) cols = st.countTokens();
134 if(rows==-1) rows = lines.size();
135
136 //Set arrays
137 data = new double[rows][cols];
138 for(int i=0;i<rows;i++){
139 st = new StringTokenizer((String)lines.elementAt(i));
140 for(int j=0;j<cols;j++){
141 try { data[i][j] = Double.valueOf(st.nextToken()).doubleValue(); }
142 catch(Exception e) { data[i][j] = missing_val; System.out.println("parse error: row " + i + " col " + j + " ,set to " + missing_val); }
143 }
144 }
145 transposed = false;
146 loaded = true;
147 }
148 catch(FileNotFoundException e) {loaded = false; System.out.println("FNF Exception on MRData.load(..)"); }
149 catch(IOException e) {loaded = false; System.out.println("I/O Exception on MRData.load(..)"); }
150 }
151
152 /** Loads data from a file in dbf format version III or IV. Uses package xbase. */
153 public void loadDBF(String file)
154 {
155 MRdbf dbf = new MRdbf();
156 try { dbf.open(file); }
157 catch (IOException e) { System.out.println("MRData I/O error: " + e.toString() + ": " + e.getMessage()); }
158 data = dbf.getDoubleArray();
159 varnames = dbf.getDoubleArrayNames();
160 rows = data.length;
161 cols = data[0].length;
162 }
163
164 /** Loads the numeric columns in a resultset */
165 public void loadResultSet(ResultSet results) throws SQLException
166 {
167 Vector records = new Vector();
168 ResultSetMetaData meta = results.getMetaData();
169 int ncols = meta.getColumnCount();
170 int nrows=0;
171 boolean[] seq = new boolean[ncols];
172 int vcols=0;
173 for(int i=0;i<ncols;i++) {
174 int type = meta.getColumnType(i+1);
175 if(type==Types.INTEGER |
176 type==Types.FLOAT |
177 type==Types.DOUBLE |
178 type==Types.TINYINT |
179 type==Types.SMALLINT |
180 type==Types.DECIMAL |
181 type==Types.NUMERIC )
182 {
183 seq[i] = true;
184 vcols++;
185 }
186 else System.out.println("Type " + meta.getColumnTypeName(i+1) + " rejected on column" + (i+1)); //DEBUG
187 }
188
189 varnames = new String[vcols];
190 for(int i=0, j=0; i<ncols; i++) {
191 if(seq[i]) varnames[j++] = meta.getColumnName(i+1);
192 }
193
194 while (results.next()) {
195 double[] dd = new double[vcols];
196 for(int i=0, j=0; i<ncols; i++) {
197 if(seq[i]==true) {
198 String s = results.getString(i+1);
199 double d;
200 try {
201 d = Double.valueOf(s).doubleValue();
202 dd[j++] = d;
203 }
204 catch(Exception e) { dd[j++]=missing_val; }
205 }
206 }
207 records.addElement(dd);
208 nrows++;
209 recordLoaded(nrows);
210 }
211 lastRecordLoaded(nrows);
212
213 data = new double[nrows][];
214 for(int i=0; i<records.size(); i++) {
215 data[i] = (double[]) records.elementAt(i);
216 }
217 System.out.println("");
218 rows = nrows;
219 cols = vcols;
220 }
221
222 /** Hook into the loading procedure, called when a new record has been loaded. Writes to standard out by default. */
223 public void recordLoaded(int row)
224 {
225 System.out.print("getting record " + row + "\r");
226 }
227
228 /** Hook into the loading procedure, called when the last record has been loaded. Writes to standard out by default. */
229 public void lastRecordLoaded(int row)
230 {
231 System.out.print("");
232 }
233
234 /** swaps rows-columns */
235 public void transpose()
236 {
237 //switch flag
238 if (transposed) transposed = false;
239 else transposed = true;
240
241 //swap rows/cols
242 int tmp1 = cols;
243 cols = rows;
244 rows = tmp1;
245
246 //create temporary array
247 double[][] tmp = new double[rows][cols];
248
249 //transpose
250 for(int i=0;i<cols;i++)
251 for (int j=0;j<rows;j++)
252 tmp[j][i] = data[i][j];
253
254 data = tmp;
255 }
256
257 /** Finds all values below detection limit (negative values) and converts them to positive values weight times detection limit*/
258 public void convertDetectionLimits(double weight)
259 {
260 if(dl==null){
261 dl = new boolean[cols][rows];
262 missing = new boolean[cols][rows];
263
264 for(int j=0;j<cols;j++){
265 for(int i=0;i<rows;i++){
266 if(data[j][i]==missing_val) {
267 dl[j][i] = false;
268 missing[j][i] =true;
269 }
270 else if(data[j][i]<0){
271 data[j][i] *= -weight;
272 dl[j][i] = true;
273 }
274 else { dl[j][i] = false; missing[j][i] = false; }
275 }
276 }
277 }
278 }
279
280
281 public MRData getClone()
282 {
283 MRData data2;
284 try{
285 data2 = (MRData) clone();
286 }
287 catch(CloneNotSupportedException e) { return null; }
288
289 //data
290 data2.data = new double[cols][rows];
291 if(!transposed)
292 for(int j=0;j<cols;j++)
293 System.arraycopy(data[j],0,data2.data[j],0,rows);
294 else
295 for(int i=0;i<rows;i++)
296 System.arraycopy(data[i],0,data2.data[i],0,cols);
297 //varnames
298 data2.varnames = new String[cols];
299 System.arraycopy(varnames,0,data2.varnames,0,cols);
300
301 return data2;
302
303 }
304 /** Returns a single subscripted array with the elements of the data array stacked as [i*cols+j], with i being the row and j the column and cols is the number of columns in the set*/
305 public double[] getStackArray()
306 {
307 double[] stack = new double[rows*cols];
308 for(int i=0;i<rows;i++) {
309 for(int j=0;j<cols;j++) {
310 if(transposed) stack[i*cols+j] = data[j][i];
311 else stack[i*cols+j] = data[i][j];
312 }
313 }
314 return stack;
315 }
316
317 /** Saves the data to a tab-delimited file with the variable names on the first line. */
318 public boolean save(String file)
319 {
320 FileOutputStream stream ;
321 PrintWriter out;
322 String s = "";
323
324 try {
325 stream = new FileOutputStream(file);
326 out = new PrintWriter(stream);
327
328 for(int i=0;i<cols;i++) s += varnames[i] + " ";
329 out.println(s);
330
331 for(int i=0;i<rows;i++) {
332 s = "";
333 for(int j=0;j<cols;j++)
334 s += data[i][j] + " ";
335 out.println(s);
336 }
337 out.flush();
338 out.close();
339 return true;
340 }
341 catch(IOException e) {return false;}
342 }
343
344 /** Rescales all the columns to the range 0 to 1 */
345 public void rescaleAll()
346 {
347 for(int j=0;j<cols;j++)
348 stat.rescale(data[j]);
349 }
350
351 /** Rescales column j to the range 0 to 1 */
352 public void rescale(int j)
353 {
354 stat.rescale(data[j]);
355
356 }
357 }