Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: diffxml/fmes/Match.java


1   /*
2   Class to generate Matchings for FMES algorithm
3    
4   Copyright (C) 2002  Adrian Mouat
5    
6   This program is free software; you can redistribute it and/or
7   modify it under the terms of the GNU General Public License
8   as published by the Free Software Foundation; either version 2
9   of the License, or (at your option) any later version.
10   
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  GNU General Public License for more details.
15  
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19   
20  Author: Adrian Mouat
21  email: amouat@postmaster.co.uk
22  */
23  
24  package diffxml.fmes;
25  
26  //Code to solve "Good Matchings Problem"
27  //Algorithm Fast Match
28  //21/02/02
29  
30  import diffxml.*; 
31  import java.io.File;
32  import java.io.FileWriter;
33  import java.io.IOException;
34  import java.io.OutputStreamWriter;
35  //import java.io.Writer;
36  import java.util.TreeSet;
37  import java.util.Comparator;
38  import java.util.Iterator;
39  //import java.io.*;
40  import java.util.StringTokenizer;
41  
42  import org.apache.xerces.dom.NodeImpl;
43  import org.apache.xerces.parsers.DOMParser;
44  import org.apache.xerces.dom3.Node3;
45  
46  import org.w3c.dom.Attr;
47  import org.w3c.dom.Document;
48  import org.w3c.dom.NamedNodeMap;
49  import org.w3c.dom.Node;
50  import org.w3c.dom.NodeList;
51  import org.w3c.dom.NamedNodeMap;
52  
53  import org.w3c.dom.traversal.*; 
54  import org.xml.sax.InputSource;
55  import org.xml.sax.Locator;
56  import org.xml.sax.SAXException;
57  import org.xml.sax.helpers.*;
58  import org.xml.sax.SAXParseException;
59  import java.io.UnsupportedEncodingException;
60  
61  public class Match
62  {
63  
64  public boolean equal(Node a, Node b)
65  {
66  //Currently returns false but should throw exception
67  
68  if (a.getNodeType()!=b.getNodeType())
69    {
70    System.out.println("Types not equal");
71    return false;
72    }
73    
74  
75  //if Node is an element
76  if (a.getNodeType()==Node.ELEMENT_NODE)
77    {
78    //Check NodeNames equal
79    if (a.getNodeName()!=b.getNodeName())
80      return false;
81  
82    //Check attributes equal
83    NamedNodeMap attrs_a=a.getAttributes();
84    NamedNodeMap attrs_b=b.getAttributes();
85  
86    int a_num = (attrs_a != null) ? attrs_a.getLength() : 0;
87    for (int i=0; i<a_num; i++)
88      {
89      //Check if attr exists in other tag
90      if (attrs_b.getNamedItem(attrs_a.item(i).getNodeName())==null)
91        return false;
92  
93      if (! attrs_b.getNamedItem(attrs_a.item(i).getNodeName()).getNodeValue().equals
94          (attrs_a.item(i).getNodeValue()) )
95        {
96        //System.out.println("Equal: attributes not equal");
97        return false;
98        }
99      }
100   //Should probably compare positions of elements, or if kids matched or sumink
101   return true;
102   }
103 
104 //If node is a text node
105 if (a.getNodeType()==Node.TEXT_NODE)
106   {
107   //Need to check whitespace and case options
108   String a_str=a.getNodeValue();
109   String b_str=b.getNodeValue();
110   
111   if (Table.ign_all_ws)
112     {
113     //Ignore all whitespace
114     //Remove whitespace from nodes before comparison
115     StringTokenizer st = new StringTokenizer(a_str);
116     a_str="";
117     while (st.hasMoreTokens())
118       a_str=a_str+st.nextToken();
119 
120     st = new StringTokenizer(b_str);
121     b_str="";
122     while (st.hasMoreTokens())
123                         b_str=b_str+st.nextToken();  
124     }    
125   else if (Table.ign_ldn_ws)
126     {
127     //Ignore leading ws
128     //just call trim
129     a_str=a_str.trim();
130     b_str=b_str.trim();
131     }  
132 
133   //Check case optn
134   
135   if (Table.ign_case)
136     {
137     //Just make it all lower
138     a_str.toLowerCase();
139     b_str.toLowerCase();
140     }
141   
142   return (a_str.equals(b_str));
143   }
144 
145 //Node is not a text node or element, so just compare value and return.
146 
147 //System.out.println("Equal: elements equal");
148 return (a.getNodeValue().equals(b.getNodeValue()));
149 }
150 
151 class tdComp implements Comparator
152 {
153 //Remember we want things stored in reverse order of depth!
154 //We don't really care about order of strings but we need to
155 //differentiate for a set
156 
157 public int compare(Object o1, Object o2)
158   {
159   tagDepth td1 = (tagDepth)(o1);
160   tagDepth td2 = (tagDepth)(o2);
161   
162   if (td1.depth==td2.depth)
163     {
164     return (td1.tag.compareTo(td2.tag));
165     }
166   else 
167     {
168     return (td2.depth-td1.depth);  
169     }
170   }
171 
172 public boolean equals(Object o)
173   {
174   return o.equals(this);
175   }
176 
177 }
178 
179 public NodeSet test(Document doc1, Document doc2) 
180 throws Exception {
181 
182   NodeSet match_set = new NodeSet();
183 
184   //Normalise documents
185   doc1.getDocumentElement().normalize();
186   doc2.getDocumentElement().normalize();
187 
188   TreeSet td1 = markElements(doc1);
189   TreeSet td2 = markElements(doc2);
190     
191 
192   String wanted="";
193         tagDepth tg= new tagDepth();
194 
195   //Iterate for nodes in Tree1
196   Iterator it=td1.iterator();
197   while (it.hasNext())
198     {
199     tg=(tagDepth) it.next();    
200 
201     wanted=tg.tag;
202 
203     db.p("Wanted Node: " + wanted);
204     //Get all nodes in both trees with this tag
205     if (wanted.equals("#text"))
206       {
207       //Use node iterator
208       //Should really be bottom up, but shouldn't make big diff
209       db.p("Matching text nodes");
210       NodeIterator ni1 = ((DocumentTraversal)doc1).createNodeIterator(doc1.getDocumentElement(), NodeFilter.SHOW_TEXT, null, false);  
211       NodeIterator ni2 = ((DocumentTraversal)doc2).createNodeIterator(doc2.getDocumentElement(), NodeFilter.SHOW_TEXT,null, false);
212       NodeImpl na= (NodeImpl) ni1.nextNode();
213       NodeImpl nb= (NodeImpl) ni2.nextNode();
214       while(na!=null)  
215         {
216         //Should always be false but leave check in for mo
217         if (na.getUserData("matched").equals("false"))
218           {
219           while(nb!=null)
220             {
221             if (nb.getUserData("matched").equals("false") && equal(na,nb) )
222               {
223               //Add nodes to matching set
224               //Node3 unq1 = na;
225               //Node3 unq2 = nb;
226               match_set.add(na,nb);  
227               //Mark nodes matched
228               na.setUserData("matched","true",null);
229               nb.setUserData("matched","true",null);
230       
231               break;
232               }
233             nb=(NodeImpl) ni2.nextNode();
234             }
235           }  
236         na=(NodeImpl) ni1.nextNode();
237         ni2.detach();
238          ni2 = ((DocumentTraversal)doc2).createNodeIterator(doc2.getDocumentElement(), NodeFilter.SHOW_TEXT,null, false);
239         nb=(NodeImpl) ni2.nextNode();
240         }
241 
242 
243       }
244     else if (wanted.equals("#comment"))
245                         {
246                         //Use node iterator
247                         //Should really be bottom up, but shouldn't make big diff
248                         db.p("Matching comments");
249                         NodeIterator ni1 = ((DocumentTraversal)doc1).createNodeIterator(doc1.getDocumentElement(), NodeFilter.SHOW_COMMENT, null, false);
250                         NodeIterator ni2 = ((DocumentTraversal)doc2).createNodeIterator(doc2.getDocumentElement(), NodeFilter.SHOW_COMMENT,null, false);
251                         NodeImpl na= (NodeImpl) ni1.nextNode();
252                         NodeImpl nb= (NodeImpl) ni2.nextNode();
253                         while(na!=null)
254                                 {
255                                 //Should always be false but leave check in for mo
256                                 if (na.getUserData("matched").equals("false"))
257                                         {
258                                         while(nb!=null)
259                                                 {
260                                                 if (nb.getUserData("matched").equals("false") && equal(na,nb) )
261                                                         {
262                                                         match_set.add(na,nb);
263                                                         na.setUserData("matched","true",null);
264                                                         nb.setUserData("matched","true",null);
265  
266                                                         break;
267                                                         }
268                                                 nb=(NodeImpl) ni2.nextNode();
269                                                 }
270                                         }
271                                 na=(NodeImpl) ni1.nextNode();
272                                 ni2.detach();
273                                  ni2 = ((DocumentTraversal)doc2).createNodeIterator(doc2.getDocumentElement(), NodeFilter.SHOW_TEXT,null, false);
274                                 nb=(NodeImpl) ni2.nextNode();
275                                 }
276                         }
277     else {
278     //System.out.println("ENTERED!!!");
279     NodeList tg1=doc1.getElementsByTagName(wanted);
280     NodeList tg2=doc2.getElementsByTagName(wanted);
281 
282     //Cycle through tg1 looking for matches in tg2
283 
284     for(int a=0; a<tg1.getLength(); a++)
285       {
286       NodeImpl a_node= (NodeImpl) tg1.item(a);
287       if (a_node.getUserData("matched").equals("false"))
288         {
289         //Cycle through tg2 looking for match
290         //tg_tag:
291         for (int b=0; b<tg2.getLength(); b++)
292           {
293           NodeImpl b_node= (NodeImpl) tg2.item(b);
294           if (b_node.getUserData("matched").equals("false") && equal(tg1.item(a), tg2.item(b)))
295             {
296              //Add nodes to matching set
297                                                 match_set.add(tg1.item(a), tg2.item(b));
298  
299                                                 //mark nodes matched
300             a_node.setUserData("matched","true",null);
301             b_node.setUserData("matched","true",null);
302 
303             //Don't think this statement did nowt
304             //continue tg_tag;
305             break;
306             }
307           }
308         }
309       }
310       }
311     }
312 
313       
314   //Output matched nodes
315   if (db.on==true)
316     match_set.print_set();
317 
318   return match_set;    
319 
320 }
321 
322 public TreeSet markElements(Document doc)
323 {
324 //Maybe want to set to SHOW_ELEMENT and do other elements separately
325 TreeWalker t = ((DocumentTraversal)doc)
326   .createTreeWalker(doc.getDocumentElement(),NodeFilter.SHOW_ALL,null,false);
327 
328 NodeImpl n;
329 int depth;
330 String tmptag;
331 tagDepth tmp2 = new tagDepth();
332 
333 TreeSet td = new TreeSet(new tdComp());
334 
335 //Add root node (should change loop to include)
336 n= (NodeImpl) t.getCurrentNode();
337 n.setUserData("matched","false",null);
338 n.setUserData("inorder","false",null);
339 tmp2.tag=n.getNodeName();
340 tmp2.depth=0;
341 td.add(tmp2);
342 db.p("Added "  + tmp2.tag + " Depth " + 0);
343 
344 while ( (n=(NodeImpl) t.nextNode()) != null)
345   {
346   n.setUserData("matched","false",null);
347   //Let children default to be "out of order"
348   //Test with "inorder"
349   n.setUserData("inorder","true",null);
350 
351   //Get iterator for TreeSet
352   Iterator it = td.iterator();
353   
354   //Add to set
355   tagDepth tmp = new tagDepth();
356   tmp.tag=n.getNodeName();
357 
358   //Bit more trouble to get depth  
359   depth=1;
360   n=(NodeImpl) n.getParentNode();
361   while(n!=doc.getDocumentElement() && n!=null)
362     {
363     depth++;  
364     n=(NodeImpl) n.getParentNode();
365     }
366   
367   tmp.depth=depth;  
368   td.add(tmp);
369   db.p("Added "  + tmp.tag + " Depth " + depth);
370     //}
371   }
372   return td;  
373 }
374 
375 public int findLeaves(Document doc, String[] leaves)
376 {
377 TreeWalker t = ((DocumentTraversal)doc)
378   .createTreeWalker(doc.getDocumentElement(),NodeFilter.SHOW_ELEMENT,new LeafFilter(),false);
379 //Node[] leaves = new Node[100];
380 Node n;
381 int i=0;
382 while ( (n=t.nextNode() ) != null)
383   {//Note that only one occurence of any element in NamedNodeMap
384   leaves[i]=n.getNodeName();
385   for (int x=0;x<i;x++)
386     {
387     if (leaves[x].equals(leaves[i]))
388       {
389       i--;
390       break;
391       }
392     }
393   i++;
394   }
395 
396 return i;
397   
398 }
399 
400 
401 class LeafFilter implements NodeFilter 
402 {
403 //Consider leaf nodes to be elements
404 //with no children other than text nodes
405 //Then the text nodes represent the value
406 //of the element
407 
408   public short acceptNode(Node n) 
409     {
410     //System.out.println("name " + n.getNodeName());
411     if (n.hasChildNodes()==false)
412       return FILTER_ACCEPT;
413     
414     NodeList nodes = n.getChildNodes();    
415     for (int i=0; i<nodes.getLength(); i++)
416       {
417       if (nodes.item(i).getNodeType()!=Node.TEXT_NODE)
418         {
419         return FILTER_SKIP;
420         }
421     
422       }
423     return FILTER_ACCEPT;
424     }
425 }
426       
427 /*
428 public static void main(String[] args)
429 {
430 
431 if (args.length !=2)
432                 System.exit(0);
433 
434 
435 try 
436 {
437   //db.on=true;
438   DOMParser parser1 = new DOMParser();
439         DOMParser parser2 = new DOMParser();
440   
441   //Turn off entity resolving
442   try {
443                    parser1.setFeature("http://xml.org/sax/features/external-general-entities", 
444                                      false);
445        parser2.setFeature("http://xml.org/sax/features/external-general-entities",
446                                      false);
447        parser1.setFeature("http://xml.org/sax/features/external-parameter-entities",
448              false);
449        parser2.setFeature("http://xml.org/sax/features/external-parameter-entities",
450                                      false);
451        parser1.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar",
452              false);
453         parser2.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar",
454                                      false);
455          parser1.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd",
456              false);
457        parser2.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd",
458                                      false);
459        parser1.setFeature("http://apache.org/xml/features/dom/create-entity-ref-nodes", 
460              false);
461        parser2.setFeature("http://apache.org/xml/features/dom/create-entity-ref-nodes",
462                                      false);
463                } 
464   catch (SAXException e) {
465                    System.err.println("could not set parser feature");}
466 
467   //Ignore whitespace nodes
468   Table.ign_ws_nodes=true;
469 
470         parser1.parse(args[0]);
471         Document doc1=parser1.getDocument();
472         parser2.parse(args[1]);
473         Document doc2=parser2.getDocument();
474   Match match=new Match();
475   NodeSet matchings = match.test(doc1, doc2);
476   //db.on=false;
477   db.p("Creating Edit Script");
478   Document delta= EditScript.create(doc1, doc2, matchings); 
479   db.p("Created Edit Script");
480   Writer writer = new Writer();
481                 try {
482                     writer.setOutput(System.out, "UTF8");
483                 }
484                 catch (UnsupportedEncodingException e) {
485                     System.err.println("error: Unable to set output. Exiting.");
486                     System.exit(1);
487                 }
488    writer.setCanonical(false);
489                 writer.write(delta);
490   
491 } catch (Exception e)
492   {
493   e.printStackTrace();
494   }
495 
496 }
497 */
498 }
499