Source code: marf/Classification/Stochastic/MaxProbabilityClassifier.java
1 package marf.Classification.Stochastic;
2
3 import java.io.StreamTokenizer;
4 import java.util.Vector;
5
6 import marf.MARF;
7 import marf.Classification.ClassificationException;
8 import marf.FeatureExtraction.IFeatureExtraction;
9 import marf.MARF.NLP;
10 import marf.Stats.StatisticalEstimators.StatisticalEstimator;
11 import marf.Storage.Result;
12 import marf.Storage.StorageException;
13
14
15 /**
16 * <p>Maximum Probability Classification Module.</p>
17 *
18 * Originally came with the LangIdentApp NLP application
19 * of Serguei Mokhov.
20 *
21 * $Id: MaxProbabilityClassifier.java,v 1.19 2005/08/13 23:09:37 susan_fan Exp $
22 *
23 * @author Serguei Mokhov
24 * @version $Revision: 1.19 $
25 * @since 0.3.0
26 */
27 public class MaxProbabilityClassifier
28 extends Stochastic
29 {
30 /**
31 * For serialization versioning.
32 * When adding new members or make other structural
33 * changes regenerate this number with the
34 * <code>serialver</code> tool that comes with JDK.
35 * @since 0.3.0.4
36 */
37 private static final long serialVersionUID = 8665926058819588355L;
38
39 /**
40 * Local reference to some instance of a statistical
41 * estimator for probability computation.
42 */
43 protected StatisticalEstimator oStatisticalEstimator = null;
44
45 /**
46 * A collection of available natural languages.
47 */
48 protected Vector oAvailLanguages = null;
49
50 /**
51 * NLP constructor that takes directly a statistical estimator.
52 * @param poStatisticalEstimator statistical estimator to use
53 */
54 public MaxProbabilityClassifier(StatisticalEstimator poStatisticalEstimator)
55 {
56 super(null);
57 init(poStatisticalEstimator);
58 }
59
60 /**
61 * Implements Classification API.
62 * TODO: fix setting of data memvers as in MaxProbabilityClassifier(StatisticalEstimator).
63 * @param poFeatureExtraction FeatureExtraction module reference
64 */
65 public MaxProbabilityClassifier(IFeatureExtraction poFeatureExtraction)
66 {
67 super(poFeatureExtraction);
68
69 // See if there is a request for a specific
70 // statistical estimator.
71 if(MARF.getModuleParams() != null)
72 {
73 Vector oParams = MARF.getModuleParams().getClassificationParams();
74
75 if(oParams != null && oParams.size() > 1)
76 this.oStatisticalEstimator = (StatisticalEstimator)oParams.elementAt(1);
77 }
78
79 init(this.oStatisticalEstimator);
80 }
81
82 /**
83 * Initializes the classifier with all member variables.
84 * @param poStatisticalEstimator statistical estimator to use
85 * @throws IllegalArgumentException if poStatisticalEstimator is null
86 */
87 public void init(StatisticalEstimator poStatisticalEstimator)
88 {
89 if(poStatisticalEstimator == null)
90 throw new IllegalArgumentException("MaxProbabilityClassifier: StatisticalEstimator is null!");
91
92 this.oStatisticalEstimator = poStatisticalEstimator;
93
94 this.oAvailLanguages = new Vector();
95
96 this.oObjectToSerialize = this.oAvailLanguages;
97 this.strFilename = this.getClass().getName() + ".gzbin";
98 }
99
100 /**
101 * Performs training of underlying statistical estimator
102 * and goes through restore/dump cycle to save the available
103 * languages. Implements Classification API.
104 * @return <code>true</code>
105 * @throws ClassificationException should there be a problem with dump/restore
106 */
107 public boolean train()
108 throws ClassificationException
109 {
110 try
111 {
112 this.oStatisticalEstimator.train();
113
114 restore();
115 /*
116 if(!this.oObjectToSerialize.equals(this.oAvailLanguages))
117 {
118 System.out.println("FULL STOP!");
119 System.exit(666);
120 }
121 */
122
123 this.oAvailLanguages = (Vector)this.oObjectToSerialize;
124
125 System.out.println("tr.before.oAvailLanguages="+oAvailLanguages);
126 System.out.println("Adding language ["+NLP.getLanguage()+"] ---- ");
127
128 if(oAvailLanguages.contains(NLP.getLanguage()) == false)
129 {
130 oAvailLanguages.add(NLP.getLanguage());
131 System.out.println("tr.after.oAvailLanguages="+oAvailLanguages);
132
133 dump();
134 }
135 }
136 catch(StorageException e)
137 {
138 throw new ClassificationException(e);
139 }
140
141 return true;
142 }
143
144 /**
145 * Performs language classification.
146 * Implements Classification API.
147 * @return <code>true</code> if classification was successful
148 * @throws ClassificationException if there was a problem with I/O
149 * or if there are no available languages
150 */
151 public boolean classify()
152 throws ClassificationException
153 {
154 try
155 {
156 restore();
157
158 this.oAvailLanguages = (Vector)this.oObjectToSerialize;
159
160 System.out.println("oAvailLanguages="+oAvailLanguages);
161
162 if(oAvailLanguages.size() == 0)
163 throw new ClassificationException("MaxProbabilityClassifier: there are no languages available.");
164
165 StreamTokenizer oTokenizerBackup = null;
166
167 for(int i = 0; i < oAvailLanguages.size(); i++)
168 {
169 String strLang = (String)oAvailLanguages.elementAt(i);
170 //oStatisticalEstimator.setLang(strLang);
171
172 NLP.setLanguage(strLang);
173 this.oStatisticalEstimator.resetFilename();
174
175 // TO prevent stream exhausture on the 1st lang
176 // oTokenizerBackup = oStatisticalEstimator.getStreamTokenizer().clone();
177
178 double dProbability = this.oStatisticalEstimator.p();
179
180 // this.oStatisticalEstimator.setStreamTokenizer(oTokenizerBackup);
181
182 this.oStatisticalEstimator.getStreamTokenizer().reset();
183
184 System.out.println("lang=" + strLang + ", P=" + dProbability);
185
186 // oResultSet.addResult(new Result(oStatisticalEstimator.P(), strLang));
187 this.oResultSet.addResult(new Result(dProbability, strLang));
188 }
189 }
190 catch(Exception e)
191 {
192 throw new ClassificationException(e);
193 }
194
195 return true;
196 }
197
198 /**
199 * Retrieves class' revision.
200 * @return revision string
201 */
202 public static String getMARFSourceCodeRevision()
203 {
204 return "$Revision: 1.19 $";
205 }
206 }
207
208 // EOF