Source code: joelib/algo/morgan/Morgan.java
1 ///////////////////////////////////////////////////////////////////////////////
2 // Filename: $RCSfile: Morgan.java,v $
3 // Purpose: Morgan number generation and unique molecule numbering.
4 // Language: Java
5 // Compiler: JDK 1.4
6 // Authors: Joerg K. Wegner
7 // Version: $Revision: 1.11 $
8 // $Date: 2003/08/22 15:56:15 $
9 // $Author: wegner $
10 //
11 // Copyright (c) Dept. Computer Architecture, University of Tuebingen, Germany
12 //
13 // This program is free software; you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation version 2 of the License.
16 //
17 // This program is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 // GNU General Public License for more details.
21 ///////////////////////////////////////////////////////////////////////////////
22 package joelib.algo.morgan;
23
24 import joelib.algo.BFS;
25 import joelib.algo.BFSResult;
26
27 import joelib.desc.DescriptorException;
28 import joelib.desc.DescriptorFactory;
29
30 import joelib.molecule.JOEAtom;
31 import joelib.molecule.JOEBond;
32 import joelib.molecule.JOEMol;
33
34 import joelib.sort.QuickInsertSort;
35 import joelib.sort.XYIntArray;
36
37 import joelib.util.iterator.NbrAtomIterator;
38
39 import wsi.ra.tool.Deque;
40 import wsi.ra.tool.DequeIterator;
41 import wsi.ra.tool.DequeNode;
42
43 /*==========================================================================*
44 * IMPORTS
45 *========================================================================== */
46 import java.util.Hashtable;
47 import java.util.Vector;
48
49 import org.apache.log4j.Category;
50
51
52 /*==========================================================================*
53 * CLASS DECLARATION
54 *========================================================================== */
55
56 /**
57 * Morgan number generation and unique molecule numbering.
58 *
59 * @author wegnerj
60 * @license GPL
61 * @cvsversion $Revision: 1.11 $, $Date: 2003/08/22 15:56:15 $
62 * @cite mor65
63 */
64 public class Morgan
65 {
66 //~ Static fields/initializers /////////////////////////////////////////////
67
68 // Obtain a suitable logger.
69 private static Category logger = Category.getInstance(
70 "joelib.algo.morgan.Morgan");
71
72 //~ Instance fields ////////////////////////////////////////////////////////
73
74 private TieResolver tieResolver = null;
75
76 /*-------------------------------------------------------------------------*
77 * public member variables
78 *------------------------------------------------------------------------- */
79 private AtomDouble[] newNumbers;
80 private boolean tieResolvingProblem;
81 private int newNumberCounter;
82
83 //~ Constructors ///////////////////////////////////////////////////////////
84
85 /*-------------------------------------------------------------------------*
86 * constructor
87 *------------------------------------------------------------------------- */
88
89 /**
90 * Constructor for the Morgan object
91 */
92 public Morgan(TieResolver _tieResolver)
93 {
94 tieResolver = _tieResolver;
95 }
96
97 //~ Methods ////////////////////////////////////////////////////////////////
98
99 /**
100 * Calculate the morgan numbers of a molecule.
101 *
102 * @param mol the molecule
103 */
104 public boolean calculate(JOEMol mol)
105 {
106 // reset new numbers
107 newNumbers = null;
108
109 // check for empty molecule
110 if (mol.empty())
111 {
112 logger.warn("Can not calculate morgan numbers for empty molecule.");
113 tieResolvingProblem = true;
114
115 return false;
116 }
117
118 tieResolvingProblem = false;
119 initNewNumbers(mol);
120 newNumberCounter = 1;
121
122 //initialize helper variables
123 int nAtoms = mol.numAtoms();
124 double[] prevNumbers = new double[nAtoms + 1];
125 double[] actNumbers = new double[nAtoms + 1];
126 Hashtable numbers = new Hashtable();
127 int differentNumbers;
128 int breakAfterEqualTimes = 3;
129
130 //initialize first numbers
131 for (int i = 1; i <= nAtoms; i++)
132 {
133 prevNumbers[i] = mol.getAtom(i).getValence();
134 actNumbers[i] = 0.0;
135 }
136
137 // iterate nAtoms^2-times over the atoms
138 // break morgan number calculation, if the
139 // numbers don't change 'breakAfterEqualTimes'-times
140 JOEAtom atom;
141 JOEAtom nbr;
142 int equalCounter = breakAfterEqualTimes;
143 differentNumbers = -1;
144
145 for (int outer = 1; outer <= nAtoms; outer++)
146 {
147 numbers.clear();
148
149 for (int inner = 1; inner <= nAtoms; inner++)
150 {
151 atom = mol.getAtom(inner);
152
153 actNumbers[inner] = 0.0;
154
155 NbrAtomIterator nait = atom.nbrAtomIterator();
156
157 while (nait.hasNext())
158 {
159 nbr = nait.nextNbrAtom();
160 actNumbers[inner] += prevNumbers[nbr.getIdx()];
161 }
162
163 numbers.put(new Double(actNumbers[inner]), "");
164 }
165
166 // copy actual numbers to previous array
167 // use faster arraycopy without arraychecking every time
168 System.arraycopy(actNumbers, 0, prevNumbers, 0, actNumbers.length);
169
170 // check for multiple numbers
171 if (differentNumbers == numbers.size())
172 {
173 // and quit if different numbers occur breakAfterEqualTimes-times
174 equalCounter--;
175
176 if (equalCounter == 0)
177 {
178 break;
179 }
180 }
181 else
182 {
183 equalCounter = breakAfterEqualTimes;
184 }
185
186 differentNumbers = numbers.size();
187
188 // verbose output
189 //System.out.println("Morgan-Algorithm round "+outer+". "+differentNumbers+" different numbers.");
190 //for(int i=1; i<=mol.numAtoms(); i++)
191 //{
192 // System.out.println("atom #"+i+": "+actNumbers[i]);
193 //}
194 }
195
196 for (int i = 1; i <= mol.numAtoms(); i++)
197 {
198 newNumbers[i - 1].tmpAtomIdx = actNumbers[i];
199 }
200
201 return true;
202 }
203
204 /**
205 * Renumber a molecule and use the calulated morgan
206 * numbers and try to resolve renumbering ties.
207 *
208 * @param mol molecule to be renumbered
209 * @return renumbered molecule
210 */
211 public JOEMol renumber(JOEMol mol)
212 {
213 if (newNumbers == null)
214 {
215 logger.warn("No morgan numbers available. '" + mol.getTitle() +
216 "' was not renumbered.");
217
218 return mol;
219 }
220
221 JOEMol newMolecule = null;
222
223 Vector tmp = new Vector();
224 mol.contiguousFragments(tmp);
225
226 if (tmp.size() > 1)
227 {
228 logger.warn("" + tmp.size() +
229 " contiguous fragments in molecule '" + mol.getTitle() +
230 "' (salt ?). Molecule was not renumbered.");
231
232 return mol;
233 }
234
235 // int itmp[];
236 // for (int i = 0; i < tmp.size(); i++)
237 // {
238 // itmp=(int[])tmp.get(i);
239 // for (int j = 0; j < itmp.length; j++)
240 // {
241 // System.out.print(itmp[j]);
242 // System.out.print(' ');
243 // }
244 // System.out.println();
245 // }
246 // get the breadth first search and start
247 // from the atom with the highest morgan
248 // number
249 double maxNumber = -1.0;
250 int maxNumberIndex = -1;
251
252 for (int i = 0; i < mol.numAtoms(); i++)
253 {
254 if (maxNumber < newNumbers[i].tmpAtomIdx)
255 {
256 maxNumber = newNumbers[i].tmpAtomIdx;
257 maxNumberIndex = newNumbers[i].atomIdx;
258 }
259 }
260
261 BFSResult bfs = getBFS(mol, mol.getAtom(maxNumberIndex));
262
263 // build sorted increasing deques for atom renumbering
264 int maxBFSnumber = -1;
265
266 for (int i = 0; i < bfs.traverse.length; i++)
267 {
268 if (maxBFSnumber < bfs.traverse[i])
269 {
270 maxBFSnumber = bfs.traverse[i];
271 }
272 }
273
274 Deque[] deques = new Deque[maxBFSnumber + 1];
275 buildDeques(mol, bfs, deques, maxBFSnumber);
276
277 // initialize tie resolvers
278 tieResolver.init(mol);
279
280 //get new numbers and resolve ties
281 for (int i = 0; i <= maxBFSnumber; i++)
282 {
283 //System.out.print("BFS "+i+": ");showDeque(deques[i]);
284 if (!getNewNumbers(mol, deques[i]))
285 {
286 //System.out.println("problem "+mol.numAtoms());
287 tieResolvingProblem = true;
288 }
289 }
290
291 //if (tieResolvingProblem)
292 // logger.debug("Renumbering problem in " + mol.getTitle());
293 //System.out.println("Unsorted morgan numbers (hopefully with resolved ties):");
294 //for(int i=0; i<mol.numAtoms(); i++)
295 //{
296 // System.out.println(newNumbers[i]);
297 //}
298 newMolecule = buildNewMolecule(mol);
299
300 return newMolecule;
301 }
302
303 public boolean tieResolvingProblem()
304 {
305 return tieResolvingProblem;
306 }
307
308 /**
309 * Get the result of a breath first search of the given molecule
310 * after starting from the given start atom. The start atom
311 * should be the atom with the highest morgan number.
312 *
313 * @param mol the molecule
314 * @param startAtom the start atom
315 * @return the result of the breath first search
316 */
317 private BFSResult getBFS(JOEMol mol, JOEAtom startAtom)
318 {
319 BFS bfs = null;
320 BFSResult result = null;
321
322 // BFSInit init = new BFSInit(startAtom);
323 Hashtable init = new Hashtable();
324 init.put(BFS.STARTING_ATOM, startAtom);
325
326 try
327 {
328 bfs = (BFS) DescriptorFactory.getDescriptor(BFS.DESC_KEY);
329 result = (BFSResult) bfs.calculate(mol, init);
330 }
331 catch (DescriptorException ex)
332 {
333 ex.printStackTrace();
334
335 return null;
336 }
337
338 return result;
339 }
340
341 /**
342 * Recalculate the given numbers of the morgan algorithm.
343 * The first number begins at index 1. All index numbers
344 * have after recalculation a difference of 1.
345 * If renumbering ties occur in the same BFS sphere, they
346 * are tried to be resolved by bond orders or atomic numbers.
347 *
348 * @param mol the molecule
349 * @param deque the actual BFS sphere stored in a deque
350 */
351 private boolean getNewNumbers(JOEMol mol, Deque deque)
352 {
353 DequeIterator dit;
354 DequeNode node;
355 dit = deque.getDequeIterator();
356
357 AtomDoubleParent tmp;
358 Vector ties = new Vector();
359 Vector tieNumbers = new Vector();
360
361 // renumber atoms
362 while (dit.hasNext())
363 {
364 node = (DequeNode) dit.next();
365 tmp = (AtomDoubleParent) node.key;
366
367 if (tmp.tie)
368 {
369 ties.add(tmp);
370 tieNumbers.add(new Integer(newNumberCounter++));
371 }
372 else
373 {
374 newNumbers[tmp.atomIdx - 1].tmpAtomIdx = (double) newNumberCounter++;
375 }
376 }
377
378 SingleTieResolver[] resolver = tieResolver.getTieResolvers();
379 SingleTieResolver singleResolver;
380
381 // resolves ties
382 // try to resolve tie with bond orders
383 int counter = ties.size();
384 double maxResolverValue;
385 double actResolverValue;
386 int pickAtomIndex;
387 int minNumber;
388 int pickNumberIndex;
389
390 for (int j = 0; j < resolver.length; j++)
391 {
392 singleResolver = resolver[j];
393
394 for (int i = 0; i < counter; i++)
395 {
396 pickAtomIndex = -1;
397 maxResolverValue = -Double.MAX_VALUE;
398 pickNumberIndex = -1;
399 minNumber = Integer.MAX_VALUE;
400
401 for (int n = 0; n < ties.size(); n++)
402 {
403 tmp = (AtomDoubleParent) ties.get(n);
404 actResolverValue = singleResolver.getResolvingValue(tmp, mol);
405
406 if (maxResolverValue < actResolverValue)
407 {
408 maxResolverValue = actResolverValue;
409 pickAtomIndex = n;
410 }
411 else if (maxResolverValue == actResolverValue)
412 {
413 // can not resolve tie
414 pickAtomIndex = -1;
415 }
416
417 if (minNumber > ((Integer) tieNumbers.get(n)).intValue())
418 {
419 minNumber = ((Integer) tieNumbers.get(n)).intValue();
420 pickNumberIndex = n;
421 }
422 }
423
424 if (pickAtomIndex != -1)
425 {
426 tmp = (AtomDoubleParent) ties.get(pickAtomIndex);
427 newNumbers[tmp.atomIdx - 1].tmpAtomIdx = ((Integer) tieNumbers.get(pickNumberIndex)).intValue();
428
429 // done, remove atom and number from list
430 ties.remove(pickAtomIndex);
431 tieNumbers.remove(pickNumberIndex);
432 }
433 }
434 }
435
436 // use normal numbering if tie can not be resolved
437 boolean tiesResolved = true;
438
439 if (ties.size() != 0)
440 {
441 //System.out.println("WARN: Can not resolves tie.");
442 tiesResolved = false;
443 }
444
445 for (int n = 0; n < ties.size(); n++)
446 {
447 tmp = (AtomDoubleParent) ties.get(n);
448 newNumbers[tmp.atomIdx - 1].tmpAtomIdx = ((Integer) tieNumbers.get(n)).intValue();
449
450 //System.out.println("REST: tmp.atomIdx:"+tmp.atomIdx+" tie_number:"+((Integer)tieNumbers.get(n)).intValue());
451 }
452
453 return tiesResolved;
454 }
455
456 /**
457 * Build the sorted BFS spheres stored in deques.
458 * The deques are sorted upwards to set the 'renumbering tie'-flags.
459 *
460 * @param mol the molecule
461 * @param bfs the BFS result started from the atom with the atom
462 * with the highest morgan number
463 * @param deques the array to store the deques
464 * @param maxBFSnumber the number of deques (BFS spheres)
465 */
466 private void buildDeques(JOEMol mol, BFSResult bfs, Deque[] deques,
467 int maxBFSnumber)
468 {
469 for (int i = 0; i <= maxBFSnumber; i++)
470 {
471 deques[i] = new Deque();
472 }
473
474 Deque deque;
475 DequeNode front;
476 DequeNode back;
477 boolean insertAfter = true;
478 DequeNode dNode = null;
479 int bfsIndex;
480 AtomDoubleParent tmpNumber;
481
482 for (int i = 0; i < mol.numAtoms(); i++)
483 {
484 bfsIndex = newNumbers[i].atomIdx - 1;
485 deque = deques[bfs.traverse[bfsIndex]];
486 front = deque.getFront();
487 back = deque.getBack();
488 tmpNumber = new AtomDoubleParent(newNumbers[i].atomIdx,
489 newNumbers[i].tmpAtomIdx, bfs.parent[bfsIndex], false);
490
491 if ((back == null) || (front == null))
492 {
493 deque.pushBack(tmpNumber);
494 }
495 else
496 {
497 DequeIterator dit = deque.getDequeIterator();
498 insertAfter = true;
499
500 while (dit.hasNext())
501 {
502 dNode = (DequeNode) dit.next();
503
504 if (newNumbers[i].tmpAtomIdx <= ((AtomDoubleParent) dNode.key).tmpAtomIdx)
505 {
506 if (newNumbers[i].tmpAtomIdx == ((AtomDoubleParent) dNode.key).tmpAtomIdx)
507 {
508 ((AtomDoubleParent) dNode.key).tie = true;
509 tmpNumber.tie = true;
510 }
511
512 deque.insertBefore(dNode, tmpNumber);
513 insertAfter = false;
514
515 break;
516 }
517 }
518
519 if (insertAfter)
520 {
521 deque.insertAfter(dNode, tmpNumber);
522 }
523 }
524 }
525 }
526
527 /**
528 * Build a new molecule and use the morgan numbers to
529 * get the new numbers.
530 * Be carefull:
531 * Data elements like descriptors (JOEPairdata) or comment
532 * data will not be copied !!!!
533 * Data elements like SSSR and all typers should not be copied !!!
534 *
535 * @param mol the molecule
536 * @return the new renumbered molecule
537 */
538 private JOEMol buildNewMolecule(JOEMol mol)
539 {
540 int checkBondNumber = mol.numBonds();
541 int checkAtomNumber = mol.numAtoms();
542
543 // sort the given numbers in newNumbers in
544 // ascending order
545 sortNewNumbers();
546
547 JOEMol newMolecule = new JOEMol(mol.getInputType(), mol.getOutputType());
548 newMolecule.beginModify();
549 newMolecule.reserveAtoms(mol.numAtoms());
550
551 JOEAtom atom = new JOEAtom();
552 JOEAtom oldAtom;
553
554 for (int i = 0; i < mol.numAtoms(); i++)
555 {
556 //System.out.println("add atom:"+mol.getAtom(newNumbers[i].atomIdx));
557 oldAtom = mol.getAtom(newNumbers[i].atomIdx);
558 atom.clear();
559 atom.setVector(oldAtom.getVector());
560 atom.setAtomicNum(oldAtom.getAtomicNum());
561 atom.setType(oldAtom.getType());
562
563 if (!newMolecule.addAtom(atom))
564 {
565 logger.error("Could not add atom.");
566
567 return null;
568 }
569 }
570
571 // create transformation hashtable to enable fast bond transformations
572 Hashtable transform = new Hashtable(mol.numAtoms());
573
574 for (int i = 0; i < mol.numAtoms(); i++)
575 {
576 // build look up table with
577 // old number --> new number !!!
578 transform.put(new Integer(newNumbers[i].atomIdx),
579 new Integer((int) newNumbers[i].tmpAtomIdx));
580 }
581
582 // create renumbered bonds
583 // bonds begin with index 0 !!!
584 // for a correct unique or canonical SMILES
585 // it's also necessarry to sort the bonds !!!
586 JOEBond oldBond;
587 int start;
588 int end;
589 int tmp;
590 XYIntArray sortedBonds = new XYIntArray(mol.numBonds());
591 Vector bonds = new Vector(mol.numBonds());
592
593 for (int i = 0; i < mol.numBonds(); i++)
594 {
595 oldBond = mol.getBond(i);
596 start = ((Integer) transform.get(new Integer(
597 oldBond.getBeginAtom().getIdx()))).intValue();
598 end = ((Integer) transform.get(new Integer(
599 oldBond.getEndAtom().getIdx()))).intValue();
600
601 if (start >= end)
602 {
603 tmp = start;
604 start = end;
605 end = tmp;
606 }
607
608 sortedBonds.x[i] = start << (16 + end);
609 sortedBonds.y[i] = i;
610
611 bonds.add(new int[]
612 {
613 start, end, oldBond.getBondOrder(), oldBond.getFlags()
614 });
615 }
616
617 sortedBonds.sortX();
618
619 int[] itmp;
620
621 for (int i = 0; i < sortedBonds.x.length; i++)
622 {
623 itmp = (int[]) bonds.get(sortedBonds.y[i]);
624
625 //System.out.println("Add Bond "+i+" "+start+" "+end);
626 //System.out.println("add bond #"+i+": ("+start+"<--"+oldBond.getBeginAtom().getIdx()+") ("+end+"<--"+oldBond.getEndAtom().getIdx()+")");
627 if (!newMolecule.addBond(itmp[0], itmp[1], itmp[2], itmp[3]))
628 {
629 logger.error("Could not add bond.");
630
631 return null;
632 }
633 }
634
635 newMolecule.endModify();
636
637 newMolecule.setTitle(mol.getTitle());
638
639 //System.out.println("new molecule:");
640 //System.out.println(newMolecule);
641 if (checkBondNumber != newMolecule.numBonds())
642 {
643 logger.error("Wrong number of bonds in renumbered molecule.");
644 }
645
646 if (checkAtomNumber != newMolecule.numAtoms())
647 {
648 logger.error("Wrong number of atoms in renumbered molecule.");
649 }
650
651 return newMolecule;
652 }
653
654 /**
655 * Initialize morgan numbers.
656 *
657 * @param mol the molecule
658 */
659 private void initNewNumbers(JOEMol mol)
660 {
661 newNumbers = new AtomDouble[mol.numAtoms()];
662
663 for (int i = 0; i < mol.numAtoms(); i++)
664 {
665 newNumbers[i] = new AtomDouble();
666
667 newNumbers[i].atomIdx = mol.getAtom(i + 1).getIdx();
668 newNumbers[i].tmpAtomIdx = 0.0;
669 }
670 }
671
672 /**
673 * Show deque.
674 *
675 * @param deque the deque
676 */
677 private void showDeque(Deque deque)
678 {
679 // show deques
680 DequeIterator dit;
681 DequeNode node;
682 dit = deque.getDequeIterator();
683
684 while (dit.hasNext())
685 {
686 node = (DequeNode) dit.next();
687 System.out.print((AtomDoubleParent) node.key);
688 }
689
690 System.out.println();
691 }
692
693 /**
694 * Sort morgan numbers.
695 */
696 private void sortNewNumbers()
697 {
698 QuickInsertSort sorting = new QuickInsertSort();
699 sorting.sort(newNumbers, new AtomDoubleComparator());
700 }
701 }
702 ///////////////////////////////////////////////////////////////////////////////
703 // END OF FILE.
704 ///////////////////////////////////////////////////////////////////////////////