Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: marf/Classification/Stochastic/MaxProbabilityClassifier.java


1   package marf.Classification.Stochastic;
2   
3   import java.io.StreamTokenizer;
4   import java.util.Vector;
5   
6   import marf.MARF;
7   import marf.Classification.ClassificationException;
8   import marf.FeatureExtraction.IFeatureExtraction;
9   import marf.MARF.NLP;
10  import marf.Stats.StatisticalEstimators.StatisticalEstimator;
11  import marf.Storage.Result;
12  import marf.Storage.StorageException;
13  
14  
15  /**
16   * <p>Maximum Probability Classification Module.</p>
17   *
18   * Originally came with the LangIdentApp NLP application
19   * of Serguei Mokhov.
20   *
21   * $Id: MaxProbabilityClassifier.java,v 1.19 2005/08/13 23:09:37 susan_fan Exp $
22   *
23   * @author Serguei Mokhov
24   * @version $Revision: 1.19 $
25   * @since 0.3.0
26   */
27  public class MaxProbabilityClassifier
28  extends Stochastic
29  {
30      /**
31     * For serialization versioning.
32     * When adding new members or make other structural
33     * changes regenerate this number with the
34     * <code>serialver</code> tool that comes with JDK.
35     * @since 0.3.0.4
36     */
37    private static final long serialVersionUID = 8665926058819588355L;
38  
39    /**
40     * Local reference to some instance of a statistical
41     * estimator for probability computation.
42     */
43    protected StatisticalEstimator oStatisticalEstimator = null;
44  
45    /**
46     * A collection of available natural languages.
47     */
48    protected Vector oAvailLanguages = null;
49  
50    /**
51     * NLP constructor that takes directly a statistical estimator.
52     * @param poStatisticalEstimator statistical estimator to use
53     */
54    public MaxProbabilityClassifier(StatisticalEstimator poStatisticalEstimator)
55    {
56      super(null);
57      init(poStatisticalEstimator);
58    }
59  
60    /**
61     * Implements Classification API.
62     * TODO: fix setting of data memvers as in MaxProbabilityClassifier(StatisticalEstimator).
63     * @param poFeatureExtraction FeatureExtraction module reference
64     */
65    public MaxProbabilityClassifier(IFeatureExtraction poFeatureExtraction)
66    {
67      super(poFeatureExtraction);
68  
69      // See if there is a request for a specific
70      // statistical estimator.
71      if(MARF.getModuleParams() != null)
72      {
73        Vector oParams = MARF.getModuleParams().getClassificationParams();
74  
75        if(oParams != null && oParams.size() > 1)
76          this.oStatisticalEstimator = (StatisticalEstimator)oParams.elementAt(1);
77      }
78  
79      init(this.oStatisticalEstimator);
80    }
81  
82    /**
83     * Initializes the classifier with all member variables.
84     * @param poStatisticalEstimator statistical estimator to use
85     * @throws IllegalArgumentException if poStatisticalEstimator is null
86     */
87    public void init(StatisticalEstimator poStatisticalEstimator)
88    {
89      if(poStatisticalEstimator == null)
90        throw new IllegalArgumentException("MaxProbabilityClassifier: StatisticalEstimator is null!");
91  
92      this.oStatisticalEstimator = poStatisticalEstimator;
93  
94      this.oAvailLanguages = new Vector();
95  
96      this.oObjectToSerialize = this.oAvailLanguages;
97      this.strFilename = this.getClass().getName() + ".gzbin";
98    }
99  
100   /**
101    * Performs training of underlying statistical estimator
102    * and goes through restore/dump cycle to save the available
103    * languages. Implements Classification API.
104    * @return <code>true</code>
105    * @throws ClassificationException should there be a problem with dump/restore
106    */
107   public boolean train()
108   throws ClassificationException
109   {
110     try
111     {
112       this.oStatisticalEstimator.train();
113 
114       restore();
115 /*
116       if(!this.oObjectToSerialize.equals(this.oAvailLanguages))
117       {
118         System.out.println("FULL STOP!");
119         System.exit(666);
120       }
121 */
122 
123       this.oAvailLanguages = (Vector)this.oObjectToSerialize;
124 
125       System.out.println("tr.before.oAvailLanguages="+oAvailLanguages);
126       System.out.println("Adding language ["+NLP.getLanguage()+"] ---- ");
127 
128       if(oAvailLanguages.contains(NLP.getLanguage()) == false)
129       {
130         oAvailLanguages.add(NLP.getLanguage());
131         System.out.println("tr.after.oAvailLanguages="+oAvailLanguages);
132 
133         dump();
134       }
135     }
136     catch(StorageException e)
137     {
138       throw new ClassificationException(e);
139     }
140 
141     return true;
142   }
143 
144   /**
145    * Performs language classification.
146    * Implements Classification API.
147    * @return <code>true</code> if classification was successful
148    * @throws ClassificationException if there was a problem with I/O
149    * or if there are no available languages
150    */
151   public boolean classify()
152   throws ClassificationException
153   {
154     try
155     {
156       restore();
157 
158       this.oAvailLanguages = (Vector)this.oObjectToSerialize;
159 
160       System.out.println("oAvailLanguages="+oAvailLanguages);
161 
162       if(oAvailLanguages.size() == 0)
163         throw new ClassificationException("MaxProbabilityClassifier: there are no languages available.");
164 
165       StreamTokenizer oTokenizerBackup = null;
166 
167       for(int i = 0; i < oAvailLanguages.size(); i++)
168       {
169         String strLang = (String)oAvailLanguages.elementAt(i);
170         //oStatisticalEstimator.setLang(strLang);
171 
172         NLP.setLanguage(strLang);
173         this.oStatisticalEstimator.resetFilename();
174 
175         // TO prevent stream exhausture on the 1st lang
176 //        oTokenizerBackup = oStatisticalEstimator.getStreamTokenizer().clone();
177 
178         double dProbability = this.oStatisticalEstimator.p();
179 
180 //        this.oStatisticalEstimator.setStreamTokenizer(oTokenizerBackup);
181 
182         this.oStatisticalEstimator.getStreamTokenizer().reset();
183 
184         System.out.println("lang=" + strLang + ", P=" + dProbability);
185 
186 //        oResultSet.addResult(new Result(oStatisticalEstimator.P(), strLang));
187         this.oResultSet.addResult(new Result(dProbability, strLang));
188       }
189     }
190     catch(Exception e)
191     {
192       throw new ClassificationException(e);
193     }
194 
195     return true;
196   }
197 
198   /**
199    * Retrieves class' revision.
200    * @return revision string
201    */
202   public static String getMARFSourceCodeRevision()
203   {
204     return "$Revision: 1.19 $";
205   }
206 }
207 
208 // EOF