Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: joelib/algo/morgan/Morgan.java


1   ///////////////////////////////////////////////////////////////////////////////
2   //  Filename: $RCSfile: Morgan.java,v $
3   //  Purpose:  Morgan number generation and unique molecule numbering.
4   //  Language: Java
5   //  Compiler: JDK 1.4
6   //  Authors:  Joerg K. Wegner
7   //  Version:  $Revision: 1.11 $
8   //            $Date: 2003/08/22 15:56:15 $
9   //            $Author: wegner $
10  //
11  //  Copyright (c) Dept. Computer Architecture, University of Tuebingen, Germany
12  //
13  //  This program is free software; you can redistribute it and/or modify
14  //  it under the terms of the GNU General Public License as published by
15  //  the Free Software Foundation version 2 of the License.
16  //
17  //  This program is distributed in the hope that it will be useful,
18  //  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  //  GNU General Public License for more details.
21  ///////////////////////////////////////////////////////////////////////////////
22  package joelib.algo.morgan;
23  
24  import joelib.algo.BFS;
25  import joelib.algo.BFSResult;
26  
27  import joelib.desc.DescriptorException;
28  import joelib.desc.DescriptorFactory;
29  
30  import joelib.molecule.JOEAtom;
31  import joelib.molecule.JOEBond;
32  import joelib.molecule.JOEMol;
33  
34  import joelib.sort.QuickInsertSort;
35  import joelib.sort.XYIntArray;
36  
37  import joelib.util.iterator.NbrAtomIterator;
38  
39  import wsi.ra.tool.Deque;
40  import wsi.ra.tool.DequeIterator;
41  import wsi.ra.tool.DequeNode;
42  
43  /*==========================================================================*
44   * IMPORTS
45   *==========================================================================  */
46  import java.util.Hashtable;
47  import java.util.Vector;
48  
49  import org.apache.log4j.Category;
50  
51  
52  /*==========================================================================*
53   * CLASS DECLARATION
54   *==========================================================================  */
55  
56  /**
57   * Morgan number generation and unique molecule numbering.
58   *
59   * @author     wegnerj
60   * @license    GPL
61   * @cvsversion    $Revision: 1.11 $, $Date: 2003/08/22 15:56:15 $
62   * @cite mor65
63   */
64  public class Morgan
65  {
66      //~ Static fields/initializers /////////////////////////////////////////////
67  
68      // Obtain a suitable logger.
69      private static Category logger = Category.getInstance(
70              "joelib.algo.morgan.Morgan");
71  
72      //~ Instance fields ////////////////////////////////////////////////////////
73  
74      private TieResolver tieResolver = null;
75  
76      /*-------------------------------------------------------------------------*
77       * public member variables
78       *-------------------------------------------------------------------------  */
79      private AtomDouble[] newNumbers;
80      private boolean tieResolvingProblem;
81      private int newNumberCounter;
82  
83      //~ Constructors ///////////////////////////////////////////////////////////
84  
85      /*-------------------------------------------------------------------------*
86       * constructor
87       *-------------------------------------------------------------------------  */
88  
89      /**
90       *  Constructor for the Morgan object
91       */
92      public Morgan(TieResolver _tieResolver)
93      {
94          tieResolver = _tieResolver;
95      }
96  
97      //~ Methods ////////////////////////////////////////////////////////////////
98  
99      /**
100      * Calculate the morgan numbers of a molecule.
101      *
102      * @param mol  the molecule
103      */
104     public boolean calculate(JOEMol mol)
105     {
106         // reset new numbers
107         newNumbers = null;
108 
109         // check for empty molecule
110         if (mol.empty())
111         {
112             logger.warn("Can not calculate morgan numbers for empty molecule.");
113             tieResolvingProblem = true;
114 
115             return false;
116         }
117 
118         tieResolvingProblem = false;
119         initNewNumbers(mol);
120         newNumberCounter = 1;
121 
122         //initialize helper variables
123         int nAtoms = mol.numAtoms();
124         double[] prevNumbers = new double[nAtoms + 1];
125         double[] actNumbers = new double[nAtoms + 1];
126         Hashtable numbers = new Hashtable();
127         int differentNumbers;
128         int breakAfterEqualTimes = 3;
129 
130         //initialize first numbers
131         for (int i = 1; i <= nAtoms; i++)
132         {
133             prevNumbers[i] = mol.getAtom(i).getValence();
134             actNumbers[i] = 0.0;
135         }
136 
137         // iterate nAtoms^2-times over the atoms
138         // break morgan number calculation, if the
139         // numbers don't change 'breakAfterEqualTimes'-times
140         JOEAtom atom;
141         JOEAtom nbr;
142         int equalCounter = breakAfterEqualTimes;
143         differentNumbers = -1;
144 
145         for (int outer = 1; outer <= nAtoms; outer++)
146         {
147             numbers.clear();
148 
149             for (int inner = 1; inner <= nAtoms; inner++)
150             {
151                 atom = mol.getAtom(inner);
152 
153                 actNumbers[inner] = 0.0;
154 
155                 NbrAtomIterator nait = atom.nbrAtomIterator();
156 
157                 while (nait.hasNext())
158                 {
159                     nbr = nait.nextNbrAtom();
160                     actNumbers[inner] += prevNumbers[nbr.getIdx()];
161                 }
162 
163                 numbers.put(new Double(actNumbers[inner]), "");
164             }
165 
166             // copy actual numbers to previous array
167             // use faster arraycopy without arraychecking every time
168             System.arraycopy(actNumbers, 0, prevNumbers, 0, actNumbers.length);
169 
170             // check for multiple numbers
171             if (differentNumbers == numbers.size())
172             {
173                 // and quit if different numbers occur breakAfterEqualTimes-times
174                 equalCounter--;
175 
176                 if (equalCounter == 0)
177                 {
178                     break;
179                 }
180             }
181             else
182             {
183                 equalCounter = breakAfterEqualTimes;
184             }
185 
186             differentNumbers = numbers.size();
187 
188             // verbose output
189             //System.out.println("Morgan-Algorithm round "+outer+". "+differentNumbers+" different numbers.");
190             //for(int i=1; i<=mol.numAtoms(); i++)
191             //{
192             //   System.out.println("atom #"+i+": "+actNumbers[i]);
193             //}
194         }
195 
196         for (int i = 1; i <= mol.numAtoms(); i++)
197         {
198             newNumbers[i - 1].tmpAtomIdx = actNumbers[i];
199         }
200 
201         return true;
202     }
203 
204     /**
205      * Renumber a molecule and use the calulated morgan
206      * numbers and try to resolve renumbering ties.
207      *
208      * @param mol  molecule to be renumbered
209      * @return     renumbered molecule
210      */
211     public JOEMol renumber(JOEMol mol)
212     {
213         if (newNumbers == null)
214         {
215             logger.warn("No morgan numbers available. '" + mol.getTitle() +
216                 "' was not renumbered.");
217 
218             return mol;
219         }
220 
221         JOEMol newMolecule = null;
222 
223         Vector tmp = new Vector();
224         mol.contiguousFragments(tmp);
225 
226         if (tmp.size() > 1)
227         {
228             logger.warn("" + tmp.size() +
229                 " contiguous fragments in molecule '" + mol.getTitle() +
230                 "' (salt ?). Molecule was not renumbered.");
231 
232             return mol;
233         }
234 
235         //    int itmp[];
236         //    for (int i = 0; i < tmp.size(); i++)
237         //    {
238         //      itmp=(int[])tmp.get(i);
239         //      for (int j = 0; j < itmp.length; j++)
240         //      {
241         //        System.out.print(itmp[j]);
242         //        System.out.print(' ');
243         //      }
244         //      System.out.println();
245         //    }
246         // get the breadth first search and start
247         // from the atom with the highest morgan
248         // number
249         double maxNumber = -1.0;
250         int maxNumberIndex = -1;
251 
252         for (int i = 0; i < mol.numAtoms(); i++)
253         {
254             if (maxNumber < newNumbers[i].tmpAtomIdx)
255             {
256                 maxNumber = newNumbers[i].tmpAtomIdx;
257                 maxNumberIndex = newNumbers[i].atomIdx;
258             }
259         }
260 
261         BFSResult bfs = getBFS(mol, mol.getAtom(maxNumberIndex));
262 
263         // build sorted increasing deques for atom renumbering
264         int maxBFSnumber = -1;
265 
266         for (int i = 0; i < bfs.traverse.length; i++)
267         {
268             if (maxBFSnumber < bfs.traverse[i])
269             {
270                 maxBFSnumber = bfs.traverse[i];
271             }
272         }
273 
274         Deque[] deques = new Deque[maxBFSnumber + 1];
275         buildDeques(mol, bfs, deques, maxBFSnumber);
276 
277         // initialize tie resolvers
278         tieResolver.init(mol);
279 
280         //get new numbers and resolve ties
281         for (int i = 0; i <= maxBFSnumber; i++)
282         {
283             //System.out.print("BFS "+i+": ");showDeque(deques[i]);
284             if (!getNewNumbers(mol, deques[i]))
285             {
286                 //System.out.println("problem "+mol.numAtoms());
287                 tieResolvingProblem = true;
288             }
289         }
290 
291         //if (tieResolvingProblem)
292         //  logger.debug("Renumbering problem in " + mol.getTitle());
293         //System.out.println("Unsorted morgan numbers (hopefully with resolved ties):");
294         //for(int i=0; i<mol.numAtoms(); i++)
295         //{
296         //  System.out.println(newNumbers[i]);
297         //}
298         newMolecule = buildNewMolecule(mol);
299 
300         return newMolecule;
301     }
302 
303     public boolean tieResolvingProblem()
304     {
305         return tieResolvingProblem;
306     }
307 
308     /**
309      * Get the result of a breath first search of the given molecule
310      * after starting from the given start atom. The start atom
311      * should be the atom with the highest morgan number.
312      *
313      * @param mol        the molecule
314      * @param startAtom  the start atom
315      * @return           the result of the breath first search
316      */
317     private BFSResult getBFS(JOEMol mol, JOEAtom startAtom)
318     {
319         BFS bfs = null;
320         BFSResult result = null;
321 
322         //        BFSInit init = new BFSInit(startAtom);
323         Hashtable init = new Hashtable();
324         init.put(BFS.STARTING_ATOM, startAtom);
325 
326         try
327         {
328             bfs = (BFS) DescriptorFactory.getDescriptor(BFS.DESC_KEY);
329             result = (BFSResult) bfs.calculate(mol, init);
330         }
331          catch (DescriptorException ex)
332         {
333             ex.printStackTrace();
334 
335             return null;
336         }
337 
338         return result;
339     }
340 
341     /**
342      * Recalculate the given numbers of the morgan algorithm.
343      * The first number begins at index 1. All index numbers
344      * have after recalculation a difference of 1.
345      * If renumbering ties occur in the same BFS sphere, they
346      * are tried to be resolved by bond orders or atomic numbers.
347      *
348      * @param mol    the molecule
349      * @param deque  the actual BFS sphere stored in a deque
350      */
351     private boolean getNewNumbers(JOEMol mol, Deque deque)
352     {
353         DequeIterator dit;
354         DequeNode node;
355         dit = deque.getDequeIterator();
356 
357         AtomDoubleParent tmp;
358         Vector ties = new Vector();
359         Vector tieNumbers = new Vector();
360 
361         // renumber atoms
362         while (dit.hasNext())
363         {
364             node = (DequeNode) dit.next();
365             tmp = (AtomDoubleParent) node.key;
366 
367             if (tmp.tie)
368             {
369                 ties.add(tmp);
370                 tieNumbers.add(new Integer(newNumberCounter++));
371             }
372             else
373             {
374                 newNumbers[tmp.atomIdx - 1].tmpAtomIdx = (double) newNumberCounter++;
375             }
376         }
377 
378         SingleTieResolver[] resolver = tieResolver.getTieResolvers();
379         SingleTieResolver singleResolver;
380 
381         // resolves ties
382         // try to resolve tie with bond orders
383         int counter = ties.size();
384         double maxResolverValue;
385         double actResolverValue;
386         int pickAtomIndex;
387         int minNumber;
388         int pickNumberIndex;
389 
390         for (int j = 0; j < resolver.length; j++)
391         {
392             singleResolver = resolver[j];
393 
394             for (int i = 0; i < counter; i++)
395             {
396                 pickAtomIndex = -1;
397                 maxResolverValue = -Double.MAX_VALUE;
398                 pickNumberIndex = -1;
399                 minNumber = Integer.MAX_VALUE;
400 
401                 for (int n = 0; n < ties.size(); n++)
402                 {
403                     tmp = (AtomDoubleParent) ties.get(n);
404                     actResolverValue = singleResolver.getResolvingValue(tmp, mol);
405 
406                     if (maxResolverValue < actResolverValue)
407                     {
408                         maxResolverValue = actResolverValue;
409                         pickAtomIndex = n;
410                     }
411                     else if (maxResolverValue == actResolverValue)
412                     {
413                         // can not resolve tie
414                         pickAtomIndex = -1;
415                     }
416 
417                     if (minNumber > ((Integer) tieNumbers.get(n)).intValue())
418                     {
419                         minNumber = ((Integer) tieNumbers.get(n)).intValue();
420                         pickNumberIndex = n;
421                     }
422                 }
423 
424                 if (pickAtomIndex != -1)
425                 {
426                     tmp = (AtomDoubleParent) ties.get(pickAtomIndex);
427                     newNumbers[tmp.atomIdx - 1].tmpAtomIdx = ((Integer) tieNumbers.get(pickNumberIndex)).intValue();
428 
429                     // done, remove atom and number from list
430                     ties.remove(pickAtomIndex);
431                     tieNumbers.remove(pickNumberIndex);
432                 }
433             }
434         }
435 
436         // use normal numbering if tie can not be resolved
437         boolean tiesResolved = true;
438 
439         if (ties.size() != 0)
440         {
441             //System.out.println("WARN: Can not resolves tie.");
442             tiesResolved = false;
443         }
444 
445         for (int n = 0; n < ties.size(); n++)
446         {
447             tmp = (AtomDoubleParent) ties.get(n);
448             newNumbers[tmp.atomIdx - 1].tmpAtomIdx = ((Integer) tieNumbers.get(n)).intValue();
449 
450             //System.out.println("REST: tmp.atomIdx:"+tmp.atomIdx+" tie_number:"+((Integer)tieNumbers.get(n)).intValue());
451         }
452 
453         return tiesResolved;
454     }
455 
456     /**
457      * Build the sorted BFS spheres stored in deques.
458      * The deques are sorted upwards to set the 'renumbering tie'-flags.
459      *
460      * @param mol           the molecule
461      * @param bfs           the BFS result started from the atom with the atom
462      *                      with the highest morgan number
463      * @param deques        the array to store the deques
464      * @param maxBFSnumber  the number of deques (BFS spheres)
465      */
466     private void buildDeques(JOEMol mol, BFSResult bfs, Deque[] deques,
467         int maxBFSnumber)
468     {
469         for (int i = 0; i <= maxBFSnumber; i++)
470         {
471             deques[i] = new Deque();
472         }
473 
474         Deque deque;
475         DequeNode front;
476         DequeNode back;
477         boolean insertAfter = true;
478         DequeNode dNode = null;
479         int bfsIndex;
480         AtomDoubleParent tmpNumber;
481 
482         for (int i = 0; i < mol.numAtoms(); i++)
483         {
484             bfsIndex = newNumbers[i].atomIdx - 1;
485             deque = deques[bfs.traverse[bfsIndex]];
486             front = deque.getFront();
487             back = deque.getBack();
488             tmpNumber = new AtomDoubleParent(newNumbers[i].atomIdx,
489                     newNumbers[i].tmpAtomIdx, bfs.parent[bfsIndex], false);
490 
491             if ((back == null) || (front == null))
492             {
493                 deque.pushBack(tmpNumber);
494             }
495             else
496             {
497                 DequeIterator dit = deque.getDequeIterator();
498                 insertAfter = true;
499 
500                 while (dit.hasNext())
501                 {
502                     dNode = (DequeNode) dit.next();
503 
504                     if (newNumbers[i].tmpAtomIdx <= ((AtomDoubleParent) dNode.key).tmpAtomIdx)
505                     {
506                         if (newNumbers[i].tmpAtomIdx == ((AtomDoubleParent) dNode.key).tmpAtomIdx)
507                         {
508                             ((AtomDoubleParent) dNode.key).tie = true;
509                             tmpNumber.tie = true;
510                         }
511 
512                         deque.insertBefore(dNode, tmpNumber);
513                         insertAfter = false;
514 
515                         break;
516                     }
517                 }
518 
519                 if (insertAfter)
520                 {
521                     deque.insertAfter(dNode, tmpNumber);
522                 }
523             }
524         }
525     }
526 
527     /**
528      * Build a new molecule and use the morgan numbers to
529      * get the new numbers.
530      * Be carefull:
531      * Data elements like descriptors (JOEPairdata) or comment
532      * data will not be copied !!!!
533      * Data elements like SSSR and all typers should not be copied !!!
534      *
535      * @param mol  the molecule
536      * @return     the new renumbered molecule
537      */
538     private JOEMol buildNewMolecule(JOEMol mol)
539     {
540         int checkBondNumber = mol.numBonds();
541         int checkAtomNumber = mol.numAtoms();
542 
543         // sort the given numbers in newNumbers in
544         // ascending order
545         sortNewNumbers();
546 
547         JOEMol newMolecule = new JOEMol(mol.getInputType(), mol.getOutputType());
548         newMolecule.beginModify();
549         newMolecule.reserveAtoms(mol.numAtoms());
550 
551         JOEAtom atom = new JOEAtom();
552         JOEAtom oldAtom;
553 
554         for (int i = 0; i < mol.numAtoms(); i++)
555         {
556             //System.out.println("add atom:"+mol.getAtom(newNumbers[i].atomIdx));
557             oldAtom = mol.getAtom(newNumbers[i].atomIdx);
558             atom.clear();
559             atom.setVector(oldAtom.getVector());
560             atom.setAtomicNum(oldAtom.getAtomicNum());
561             atom.setType(oldAtom.getType());
562 
563             if (!newMolecule.addAtom(atom))
564             {
565                 logger.error("Could not add atom.");
566 
567                 return null;
568             }
569         }
570 
571         // create transformation hashtable to enable fast bond transformations
572         Hashtable transform = new Hashtable(mol.numAtoms());
573 
574         for (int i = 0; i < mol.numAtoms(); i++)
575         {
576             // build look up table with
577             // old number --> new number !!!
578             transform.put(new Integer(newNumbers[i].atomIdx),
579                 new Integer((int) newNumbers[i].tmpAtomIdx));
580         }
581 
582         // create renumbered bonds
583         // bonds begin with index 0 !!!
584         // for a correct unique or canonical SMILES
585         // it's also necessarry to sort the bonds !!!
586         JOEBond oldBond;
587         int start;
588         int end;
589         int tmp;
590         XYIntArray sortedBonds = new XYIntArray(mol.numBonds());
591         Vector bonds = new Vector(mol.numBonds());
592 
593         for (int i = 0; i < mol.numBonds(); i++)
594         {
595             oldBond = mol.getBond(i);
596             start = ((Integer) transform.get(new Integer(
597                         oldBond.getBeginAtom().getIdx()))).intValue();
598             end = ((Integer) transform.get(new Integer(
599                         oldBond.getEndAtom().getIdx()))).intValue();
600 
601             if (start >= end)
602             {
603                 tmp = start;
604                 start = end;
605                 end = tmp;
606             }
607 
608             sortedBonds.x[i] = start << (16 + end);
609             sortedBonds.y[i] = i;
610 
611             bonds.add(new int[]
612                 {
613                     start, end, oldBond.getBondOrder(), oldBond.getFlags()
614                 });
615         }
616 
617         sortedBonds.sortX();
618 
619         int[] itmp;
620 
621         for (int i = 0; i < sortedBonds.x.length; i++)
622         {
623             itmp = (int[]) bonds.get(sortedBonds.y[i]);
624 
625             //System.out.println("Add Bond "+i+" "+start+" "+end);
626             //System.out.println("add bond #"+i+": ("+start+"<--"+oldBond.getBeginAtom().getIdx()+") ("+end+"<--"+oldBond.getEndAtom().getIdx()+")");
627             if (!newMolecule.addBond(itmp[0], itmp[1], itmp[2], itmp[3]))
628             {
629                 logger.error("Could not add bond.");
630 
631                 return null;
632             }
633         }
634 
635         newMolecule.endModify();
636 
637         newMolecule.setTitle(mol.getTitle());
638 
639         //System.out.println("new molecule:");
640         //System.out.println(newMolecule);
641         if (checkBondNumber != newMolecule.numBonds())
642         {
643             logger.error("Wrong number of bonds in renumbered molecule.");
644         }
645 
646         if (checkAtomNumber != newMolecule.numAtoms())
647         {
648             logger.error("Wrong number of atoms in renumbered molecule.");
649         }
650 
651         return newMolecule;
652     }
653 
654     /**
655      * Initialize morgan numbers.
656      *
657      * @param mol  the molecule
658      */
659     private void initNewNumbers(JOEMol mol)
660     {
661         newNumbers = new AtomDouble[mol.numAtoms()];
662 
663         for (int i = 0; i < mol.numAtoms(); i++)
664         {
665             newNumbers[i] = new AtomDouble();
666 
667             newNumbers[i].atomIdx = mol.getAtom(i + 1).getIdx();
668             newNumbers[i].tmpAtomIdx = 0.0;
669         }
670     }
671 
672     /**
673      * Show deque.
674      *
675      * @param deque  the deque
676      */
677     private void showDeque(Deque deque)
678     {
679         // show deques
680         DequeIterator dit;
681         DequeNode node;
682         dit = deque.getDequeIterator();
683 
684         while (dit.hasNext())
685         {
686             node = (DequeNode) dit.next();
687             System.out.print((AtomDoubleParent) node.key);
688         }
689 
690         System.out.println();
691     }
692 
693     /**
694      * Sort morgan numbers.
695      */
696     private void sortNewNumbers()
697     {
698         QuickInsertSort sorting = new QuickInsertSort();
699         sorting.sort(newNumbers, new AtomDoubleComparator());
700     }
701 }
702 ///////////////////////////////////////////////////////////////////////////////
703 //  END OF FILE.
704 ///////////////////////////////////////////////////////////////////////////////