Source code: recoinx/clef/CLEFLanguageMerger.java
1
2 package recoinx.clef;
3
4 import java.util.Enumeration;
5 import java.util.Hashtable;
6 import java.util.Vector;
7
8 import org.apache.log4j.Logger;
9
10 import recoin.container.RetrievalContainer;
11 import recoin.exception.ComponentRunnableException;
12 import recoin.exception.InitiationException;
13
14 import recoin.group.Component;
15 import recoin.group.ComponentRunnable;
16 import recoin.group.ComponentSupport;
17 import recoin.group.ComponentWorker;
18
19 /**
20 * Merges CLEFResultLists that share the same language. The CLEFResults are
21 * merged according to weights assigned for the different databases.
22 * @author Jan H. Scheufen
23 * @version 0.2.9
24 */
25 public class CLEFLanguageMerger extends ComponentWorker
26 {
27 /**
28 * The weights for the CLEFResultLists from different components.<br>
29 * The chain ID of the component is used as the key in this Hashtable.
30 * The values are extracted from the attributes by combining the information in the
31 * value field of the attribute. The name of the attribute has to start with 'weight'
32 * to be recognized.
33 * <br><br>
34 * Example: Attributename: weightMySQL1 Value: 3[7]:1.45
35 * <br><br>
36 * This would merge all CLEFResultLists that were created by the Component with
37 * the chain ID '3[7]' by multiplying their scores with 1.45.
38 */
39 protected Hashtable componentWeights;
40 /**
41 * The logger of this class.
42 */
43 static Logger logger;
44
45 /**
46 * Creates a new CLEFLanguageMerger.
47 */
48 public CLEFLanguageMerger()
49 {
50 // Initialize the logger for this class.
51 logger = Logger.getLogger( CLEFLanguageMerger.class.getName() );
52 }
53
54 /**
55 * Initiates this CLEFLanguageMerger by passing the specified Component to the
56 * superclass's initiate method. Furthermore the Component is examined to see if
57 * any attributes beginning with 'weight...' have been specified.
58 */
59 public void initiate(Component c) throws InitiationException
60 {
61 super.initiate(c);
62
63 componentWeights = new Hashtable();
64 Hashtable attributes = component.getAttributes();
65 for( Enumeration enum = attributes.keys(); enum.hasMoreElements(); )
66 {
67 String key = (String)enum.nextElement();
68 String[] valuePair = new String[2];
69 if( key.startsWith("weight") )
70 {
71 String value = (String)attributes.get( key );
72 valuePair = value.split(":");
73 componentWeights.put( valuePair[0], Float.valueOf(valuePair[1]) );
74 }
75 }
76 }
77
78 /**
79 * @see recoin.group.ComponentWorker#createComponentRunnable(recoin.container.RetrievalContainer)
80 */
81 public ComponentRunnable createComponentRunnable(RetrievalContainer container)
82 {
83 if( !component.isInitiated() )
84 {
85 try
86 {
87 // TODO shouldn't component.initiate be called? otherwise, isInitiated() will not change!
88 initiate(component);
89 }
90 catch (InitiationException e)
91 {
92 logger.error("Unable to initiate ComponentWorker.");
93 return null;
94 }
95 }
96 return new LanguageMergerRunnable(container, this);
97 }
98
99 /**
100 * @see recoin.group.ComponentWorker#createComponentRunnable(RetrievalContainer, Vector)
101 */
102 public ComponentRunnable createComponentRunnable( RetrievalContainer container, Vector supports)
103 {
104 if( !component.isInitiated() )
105 {
106 try
107 {
108 initiate(component);
109 }
110 catch (InitiationException e)
111 {
112 logger.error("Unable to initiate ComponentWorker.");
113 return null;
114 }
115 }
116 return new LanguageMergerRunnable( container, (ComponentSupport)supports.firstElement(), this );
117 }
118
119 /**
120 * @see recoin.group.ComponentWorker#createComponentRunnable(recoin.container.RetrievalContainer, Vector, recoin.group.ComponentRunnable)
121 */
122 public ComponentRunnable createComponentRunnable( RetrievalContainer container, Vector supports, ComponentRunnable runnable)
123 {
124 if( !component.isInitiated() )
125 {
126 try
127 {
128 initiate(component);
129 }
130 catch (InitiationException e)
131 {
132 logger.error("Unable to initiate ComponentWorker.");
133 return null;
134 }
135 }
136 return new LanguageMergerRunnable( container, (ComponentSupport)supports.firstElement(), runnable, this );
137 }
138
139
140 /**
141 * The LanguageMergerRunnable class implements the behavior of a ComponentRunnable
142 * that merges CLEFResultLists of the same language into a MergedList.
143 */
144 protected class LanguageMergerRunnable extends ComponentRunnable
145 {
146 /**
147 * The languages.
148 */
149 private int[] languages;
150 /**
151 * Will be set to the first CLEFQuery that is found in the RetrievalContainer.
152 */
153 private CLEFQuery query;
154
155 /**
156 * Creates a new LanguageMergerRunnable with the specified RetrievalContainer
157 * and ComponentWorker.
158 * @param container the RetrievalContainer
159 * @param worker the ComponentWorker
160 */
161 public LanguageMergerRunnable(RetrievalContainer container, ComponentWorker worker)
162 {
163 super(container, worker);
164 }
165
166 /**
167 * Creates a new LanguageMergerRunnable with the specified RetrievalContainer,
168 * ComponentSupport and ComponentWorker.
169 * @param container the RetrievalContainer
170 * @param support the ComponentSupport
171 * @param worker the ComponentWorker
172 */
173 public LanguageMergerRunnable(RetrievalContainer container, ComponentSupport support, ComponentWorker worker)
174 {
175 super(container, support, worker);
176 }
177
178 /**
179 * Creates a new LanguageMergerRunnable with the specified RetrievalContainer,
180 * ComponentSupport, ComponentRunnable and ComponentWorker.
181 * @param container the RetrievalContainer
182 * @param support the ComponentSupport
183 * @param runnable the ComponentRunnable
184 * @param worker the ComponentWorker
185 */
186 public LanguageMergerRunnable( RetrievalContainer container, ComponentSupport support, ComponentRunnable runnable, ComponentWorker worker )
187 {
188 super(container, support, runnable, worker);
189 }
190
191 /**
192 * Uses all CLEFResultLists and the first CLEFQuery it finds in the
193 * RetrievalContainer to merge the lists into one MergedList. The CLEFQuery
194 * is only used to provide additional information, e.g. about the topic.
195 * The CLEFResultLists are first grouped by their languages and then
196 * merged according to the weights specified for their chain ID. If no
197 * weight had been specified for a list, the list is merged with the others
198 * using a weight of 1.0.
199 */
200 public void run()
201 {
202 logger.debug(this.getClass().getName()+" started.");
203 // Verarbeitung eines bestimmten ComponentSupport
204 if( container != null && container instanceof RetrievalContainer )
205 {
206 Vector resultLists = new Vector();
207 logger.info("Processing RetrievalContainer.");
208 // find all CLEFResultList objects and one CLEFQuery
209 for( Enumeration enum = container.getComponentSupport().elements(); enum.hasMoreElements(); )
210 {
211 ComponentSupport support = (ComponentSupport) enum.nextElement();
212 if( support instanceof CLEFResultList )
213 {
214 resultLists.add( support );
215 }
216 else if( query == null && support instanceof CLEFQuery )
217 {
218 query = (CLEFQuery)support;
219 }
220 }
221
222 if( resultLists.size() > 0 )
223 {
224 logger.debug("Merging "+resultLists.size()+" CLEFResultLists into one CLEFMergedResultList.");
225 CLEFResultListMerger merger;
226 CLEFResultList[] lists;
227 float[] weights;
228 if( query != null && query.getCutOff() > 0 )
229 {
230 // create CLEFResultListMerger that uses cut off if necessary
231 merger = new CLEFResultListMerger( query.getCutOff() );
232 }
233 else
234 merger = new CLEFResultListMerger();
235
236 // Iterate through languages
237 languages = CLEFConstants.getLanguages();
238 for( int i = 0; i < languages.length; i++ )
239 {
240 logger.debug("Trying to find CLEFResultLists objects for language "+languages[i]);
241 Vector langLists = new Vector();
242 // find all ResultLists for the current language
243 for( Enumeration enum = resultLists.elements(); enum.hasMoreElements(); )
244 {
245 CLEFResultList list = (CLEFResultList) enum.nextElement();
246 if( list.getLanguage() == languages[i] )
247 {
248 langLists.add(list);
249 }
250 }
251
252 // process the lists of the current language
253 if( langLists.size() > 0 )
254 {
255 // create arrays for lists and weights
256 lists = new CLEFResultList[langLists.size()];
257 weights = new float[langLists.size()];
258 int index = 0;
259 for( Enumeration enum = langLists.elements(); enum.hasMoreElements(); index++ )
260 {
261 CLEFResultList list = (CLEFResultList) enum.nextElement();
262 String chainID = list.getChainID();
263 lists[index] = list;
264 // if a weight had been specified it is used. otherwise a multiplicator of 1.0 is used.
265 if( componentWeights.get( chainID ) != null )
266 weights[index] = ((Float)componentWeights.get( chainID )).floatValue();
267 else
268 weights[index] = 1.0f;
269 }
270 logger.debug("Merging ResultLists");
271 CLEFMergedResultList mergedList = merger.createMergedResultList( lists, weights );
272 // set the topic of the mergedList to the one of the first list in array
273 mergedList.setTopic( lists[0].getTopic() );
274 // set the language of the MergedResultList
275 mergedList.setLanguage( languages[i] );
276 // use ComponentImpl's component reference, not this.component (has same result)
277 component.markComponentSupport( mergedList );
278 this.container.addComponentSupport( mergedList );
279 }
280 else
281 logger.debug("No CLEFResultsList objects found for language "+languages[i]);
282 }
283 }
284 else
285 logger.debug("No CLEFResultLists found for merging.");
286 }
287 else
288 {
289 logger.warn("Cannot process ComponentSupport "+support.getClass().getName());
290 container.addException( new ComponentRunnableException("The provided ComponentSupport cannot be processed.") );
291 }
292
293 setFinished(true);
294 }
295 }
296
297 }