Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: recoinx/clef/CLEFLanguageMerger.java


1   
2   package recoinx.clef;
3   
4   import java.util.Enumeration;
5   import java.util.Hashtable;
6   import java.util.Vector;
7   
8   import org.apache.log4j.Logger;
9   
10  import recoin.container.RetrievalContainer;
11  import recoin.exception.ComponentRunnableException;
12  import recoin.exception.InitiationException;
13  
14  import recoin.group.Component;
15  import recoin.group.ComponentRunnable;
16  import recoin.group.ComponentSupport;
17  import recoin.group.ComponentWorker;
18  
19  /**
20   * Merges CLEFResultLists that share the same language. The CLEFResults are
21   * merged according to weights assigned for the different databases.
22   * @author Jan H. Scheufen
23   * @version 0.2.9
24   */
25  public class CLEFLanguageMerger extends ComponentWorker
26  {
27    /**
28     * The weights for the CLEFResultLists from different components.<br>
29     * The chain ID of the component is used as the key in this Hashtable.
30     * The values are extracted from the attributes by combining the information in the
31     * value field of the attribute. The name of the attribute has to start with 'weight'
32     * to be recognized.
33     * <br><br>
34     * Example: Attributename: weightMySQL1 Value: 3[7]:1.45
35     * <br><br>
36     * This would merge all CLEFResultLists that were created by the Component with
37     * the chain ID '3[7]' by multiplying their scores with 1.45. 
38     */
39    protected Hashtable componentWeights;
40    /**
41     * The logger of this class.
42     */
43    static Logger logger;
44    
45    /**
46     * Creates a new CLEFLanguageMerger.
47     */
48    public CLEFLanguageMerger() 
49    {
50      // Initialize the logger for this class.
51      logger = Logger.getLogger( CLEFLanguageMerger.class.getName() );
52    }
53    
54    /**
55     * Initiates this CLEFLanguageMerger by passing the specified Component to the
56     * superclass's initiate method. Furthermore the Component is examined to see if
57     * any attributes beginning with 'weight...' have been specified.
58     */
59    public void initiate(Component c) throws InitiationException
60    {
61      super.initiate(c);
62      
63      componentWeights = new Hashtable();
64      Hashtable attributes = component.getAttributes();
65      for( Enumeration enum = attributes.keys(); enum.hasMoreElements(); )
66      {
67        String key = (String)enum.nextElement();
68        String[] valuePair = new String[2];
69        if( key.startsWith("weight") )
70        {
71          String value = (String)attributes.get( key );
72          valuePair = value.split(":");
73          componentWeights.put( valuePair[0], Float.valueOf(valuePair[1]) );
74        }
75      }
76    }
77    
78    /**
79     * @see recoin.group.ComponentWorker#createComponentRunnable(recoin.container.RetrievalContainer)
80     */
81    public ComponentRunnable createComponentRunnable(RetrievalContainer container)
82    {
83      if( !component.isInitiated() )
84      {
85        try
86        {
87          // TODO shouldn't component.initiate be called? otherwise, isInitiated() will not change!
88          initiate(component);
89        }
90        catch (InitiationException e)
91        {
92          logger.error("Unable to initiate ComponentWorker.");
93          return null;
94        }
95      }
96      return new LanguageMergerRunnable(container, this);
97    }
98  
99    /**
100    * @see recoin.group.ComponentWorker#createComponentRunnable(RetrievalContainer, Vector)
101    */
102   public ComponentRunnable createComponentRunnable( RetrievalContainer container, Vector supports) 
103   {
104     if( !component.isInitiated() )
105     {
106       try
107       {
108         initiate(component);
109       }
110       catch (InitiationException e)
111       {
112         logger.error("Unable to initiate ComponentWorker.");
113         return null;
114       }
115     }
116     return new LanguageMergerRunnable( container, (ComponentSupport)supports.firstElement(), this );
117   }
118   
119   /**
120    * @see recoin.group.ComponentWorker#createComponentRunnable(recoin.container.RetrievalContainer, Vector, recoin.group.ComponentRunnable)
121    */
122   public ComponentRunnable createComponentRunnable( RetrievalContainer container, Vector supports, ComponentRunnable runnable)
123   {
124     if( !component.isInitiated() )
125     {
126       try
127       {
128         initiate(component);
129       }
130       catch (InitiationException e)
131       {
132         logger.error("Unable to initiate ComponentWorker.");
133         return null;
134       }
135     }
136     return new LanguageMergerRunnable( container, (ComponentSupport)supports.firstElement(), runnable, this );
137   }
138 
139 
140   /**
141    * The LanguageMergerRunnable class implements the behavior of a ComponentRunnable
142    * that merges CLEFResultLists of the same language into a MergedList.
143    */
144   protected class LanguageMergerRunnable extends ComponentRunnable 
145   {
146     /**
147      * The languages.
148      */
149     private int[] languages;
150     /**
151      * Will be set to the first CLEFQuery that is found in the RetrievalContainer.
152      */
153     private CLEFQuery query;
154     
155     /**
156      * Creates a new LanguageMergerRunnable with the specified RetrievalContainer
157      * and ComponentWorker.
158      * @param container the RetrievalContainer
159      * @param worker the ComponentWorker
160      */
161     public LanguageMergerRunnable(RetrievalContainer container, ComponentWorker worker)
162     {
163       super(container, worker);
164     }
165 
166     /**
167      * Creates a new LanguageMergerRunnable with the specified RetrievalContainer,
168      * ComponentSupport and ComponentWorker.
169      * @param container the RetrievalContainer
170      * @param support the ComponentSupport
171      * @param worker the ComponentWorker
172      */
173     public LanguageMergerRunnable(RetrievalContainer container, ComponentSupport support, ComponentWorker worker) 
174     {
175       super(container, support, worker);
176     }
177     
178     /**
179      * Creates a new LanguageMergerRunnable with the specified RetrievalContainer,
180      * ComponentSupport, ComponentRunnable and ComponentWorker.
181      * @param container the RetrievalContainer
182      * @param support the ComponentSupport
183      * @param runnable the ComponentRunnable
184      * @param worker the ComponentWorker
185      */
186     public LanguageMergerRunnable( RetrievalContainer container, ComponentSupport support, ComponentRunnable runnable, ComponentWorker worker )
187     {
188       super(container, support, runnable, worker);
189     }
190 
191     /**
192      * Uses all CLEFResultLists and the first CLEFQuery it finds in the
193      * RetrievalContainer to merge the lists into one MergedList. The CLEFQuery
194      * is only used to provide additional information, e.g. about the topic.
195      * The CLEFResultLists are first grouped by their languages and then
196      * merged according to the weights specified for their chain ID. If no
197      * weight had been specified for a list, the list is merged with the others
198      * using a weight of 1.0.
199      */
200     public void run() 
201     {
202       logger.debug(this.getClass().getName()+" started.");
203       // Verarbeitung eines bestimmten ComponentSupport
204       if( container != null && container instanceof RetrievalContainer )
205       {
206         Vector resultLists = new Vector();
207         logger.info("Processing RetrievalContainer.");
208         // find all CLEFResultList objects and one CLEFQuery
209         for( Enumeration enum = container.getComponentSupport().elements(); enum.hasMoreElements(); )
210         {
211           ComponentSupport support = (ComponentSupport) enum.nextElement();
212           if( support instanceof CLEFResultList )
213           {
214             resultLists.add( support );
215           }
216           else if( query == null && support instanceof CLEFQuery )
217           {
218             query = (CLEFQuery)support;
219           }
220         }
221         
222         if( resultLists.size() > 0 )
223         {
224           logger.debug("Merging "+resultLists.size()+" CLEFResultLists into one CLEFMergedResultList.");
225           CLEFResultListMerger merger;
226           CLEFResultList[] lists;
227           float[] weights;
228           if( query != null && query.getCutOff() > 0 )
229           {
230             // create CLEFResultListMerger that uses cut off if necessary
231             merger = new CLEFResultListMerger( query.getCutOff() );
232           }
233           else
234             merger = new CLEFResultListMerger();
235           
236           // Iterate through languages
237           languages = CLEFConstants.getLanguages();
238           for( int i = 0; i < languages.length; i++ )
239           {
240             logger.debug("Trying to find CLEFResultLists objects for language "+languages[i]);
241             Vector langLists = new Vector();
242             // find all ResultLists for the current language
243             for( Enumeration enum = resultLists.elements(); enum.hasMoreElements(); )
244             {
245               CLEFResultList list = (CLEFResultList) enum.nextElement();
246               if( list.getLanguage() == languages[i] )
247               {
248                 langLists.add(list);
249               }
250             }
251             
252             // process the lists of the current language
253             if( langLists.size() > 0 )
254             {
255               // create arrays for lists and weights
256               lists = new CLEFResultList[langLists.size()];
257               weights = new float[langLists.size()];
258               int index = 0;
259               for( Enumeration enum = langLists.elements(); enum.hasMoreElements(); index++ )
260               {
261                 CLEFResultList list = (CLEFResultList) enum.nextElement();
262                 String chainID = list.getChainID();
263                 lists[index] = list;
264                 // if a weight had been specified it is used. otherwise a multiplicator of 1.0 is used.
265                 if( componentWeights.get( chainID ) != null )
266                   weights[index] = ((Float)componentWeights.get( chainID )).floatValue();
267                 else
268                   weights[index] = 1.0f;
269               }
270               logger.debug("Merging ResultLists");
271               CLEFMergedResultList mergedList = merger.createMergedResultList( lists, weights );
272               // set the topic of the mergedList to the one of the first list in array
273               mergedList.setTopic( lists[0].getTopic() );
274               // set the language of the MergedResultList
275               mergedList.setLanguage( languages[i] );
276               // use ComponentImpl's component reference, not this.component (has same result)
277               component.markComponentSupport( mergedList );
278               this.container.addComponentSupport( mergedList );
279             }
280             else
281               logger.debug("No CLEFResultsList objects found for language "+languages[i]);
282           }
283         }
284         else
285           logger.debug("No CLEFResultLists found for merging.");
286       }
287       else
288       {
289         logger.warn("Cannot process ComponentSupport "+support.getClass().getName());
290         container.addException( new ComponentRunnableException("The provided ComponentSupport cannot be processed.") );
291       }
292       
293       setFinished(true);
294     }
295   }
296   
297 }