Source code: org/bdgp/apps/dagedit/dataadapter/GOFlatFileAdapter.java
1 package org.bdgp.apps.dagedit.dataadapter;
2
3 import java.io.*;
4 import java.net.*;
5 import java.util.*;
6 import org.bdgp.apps.dagedit.datamodel.*;
7 import org.bdgp.apps.dagedit.DAGEditor;
8 import org.bdgp.io.*;
9 import org.bdgp.cv.datamodel.*;
10 import org.bdgp.util.*;
11
12 public class GOFlatFileAdapter extends AbstractDataAdapter implements DEDataAdapterI {
13
14 Stack termStack;
15 Term root;
16 Term writeRoot;
17 String currentType;
18
19 Vector filenames;
20 URL defFilename;
21
22 Vector parentageList;
23 CompoundGOFlatFileParseException exceptionHolder;
24 protected boolean strictParentage = false;
25 protected boolean strictDefinition = false;
26 protected boolean hideDownstream = true;
27 protected boolean allowCycles = false;
28 protected boolean allowDangling = false;
29
30 private DEDataAdapterI idAdapter = new DefaultIDAdapter();
31
32 protected static TermRelationshipType FAILED_TYPE =
33 new TermRelationshipType(null, null);
34 protected static TermRelationshipType UNKNOWN_TYPE =
35 new TermRelationshipType("UNKNOWN", "unknown");
36
37 protected TermRelationshipType defaultType;
38
39 protected static final Hashtable RESERVED_SYMBOLS = new Hashtable();
40
41 protected Vector categories = new Vector();
42 protected Vector relationshipTypes = new Vector();
43
44 protected String comment;
45
46 protected boolean reduceSize = false;
47 protected boolean useLegacyTypes = false;
48
49 public final static char BOUNDARY_CHAR = '@';
50
51 static {
52 Object junk = new Object();
53 RESERVED_SYMBOLS.put(new Character(';'), junk);
54 RESERVED_SYMBOLS.put(new Character('$'), junk);
55 RESERVED_SYMBOLS.put(new Character(','), junk);
56 RESERVED_SYMBOLS.put(new Character(':'), junk);
57 RESERVED_SYMBOLS.put(new Character('!'), junk);
58 RESERVED_SYMBOLS.put(new Character('\\'), junk);
59 RESERVED_SYMBOLS.put(new Character('?'), junk);
60 RESERVED_SYMBOLS.put(new Character(BOUNDARY_CHAR), junk);
61 }
62
63 public static boolean isReservedCharacter(char c) {
64 return RESERVED_SYMBOLS.containsKey(new Character(c));
65 }
66
67 public void setReduceSize(boolean reduceSize) {
68 this.reduceSize = reduceSize;
69 }
70
71 public void setComment(String comment) {
72 this.comment = comment;
73 }
74
75 protected static Hashtable getDefaultTypeBindings() {
76 Hashtable out = new Hashtable();
77 out.put("%", new TermRelationshipType("ISA",
78 "is a"));
79 out.put("<", new TermRelationshipType("PARTOF",
80 "part of"));
81 return out;
82 }
83
84 protected static TermRelationshipType getDefaultTermRelationship() {
85 return (TermRelationshipType) getDefaultTypeBindings().
86 get(new Character('%'));
87 }
88
89 Hashtable charToType;
90 Hashtable typeToChar;
91
92
93 public Vector getRelationshipTypes() {
94 Vector out = new Vector();
95 Enumeration e = charToType.elements();
96 while(e.hasMoreElements()) {
97 out.addElement(e.nextElement());
98 }
99 TermRelationshipType trt = getDefaultType();
100 out.removeElement(trt);
101 out.insertElementAt(trt, 0);
102 return out;
103 }
104
105 public GOFlatFileAdapter() {
106 setCharTypeMappings(getDefaultTypeBindings(),
107 getDefaultTermRelationship());
108 }
109
110 public void setAllowCycles(boolean allowCycles) {
111 this.allowCycles = allowCycles;
112 }
113
114 public boolean getAllowCycles() {
115 return allowCycles;
116 }
117
118 public void setAllowDangling(boolean allowDangling) {
119 this.allowDangling = allowDangling;
120 }
121
122 public boolean getAllowDangling() {
123 return allowDangling;
124 }
125
126 public Vector getTermCategories() throws DataAdapterException {
127 return categories;
128 }
129
130 public void setCharTypeMappings(Hashtable charToType,
131 TermRelationshipType defaultType) {
132 this.charToType = (Hashtable) charToType.clone();
133 this.defaultType = defaultType;
134
135 typeToChar = new Hashtable();
136
137 Enumeration e = charToType.keys();
138 while(e.hasMoreElements()) {
139 Object key = e.nextElement();
140 Object value = charToType.get(key);
141 typeToChar.put(value, key);
142 }
143
144 typeToChar.put(UNKNOWN_TYPE, new Character('?'));
145 }
146
147 private class ParentageHolder {
148 public String parentID;
149 public String childID;
150 public String line;
151 public String name;
152 public String type;
153 public int lineNumber;
154 public int colNumber;
155 }
156
157 private class DefHolder {
158 public String goid = "";
159 public String term = "";
160 public String def = "";
161 public Vector references = new Vector();
162 }
163
164 public void setHideDownstream(boolean hide) {
165 this.hideDownstream = hide;
166 }
167
168 public String [] getIDs(Term root, Term term, String prefix, int length,
169 int count)
170 throws DataAdapterException {
171 throw new DataAdapterException("Not supported");
172 }
173
174 public String [] getIDs(Term root, Term term, String prefix,
175 int min, int max, int length, int count)
176 throws DataAdapterException {
177 throw new DataAdapterException("Not supported");
178 }
179
180 public Vector getHistories() throws DataAdapterException {
181 throw new DataAdapterException("Not supported");
182 }
183
184 public void setStrictParentage(boolean strict) {
185 strictParentage = strict;
186 }
187
188 public void setStrictDefinition(boolean strict) {
189 strictDefinition = strict;
190 }
191
192 public DataAdapterUI getUI(IOOperation op) {
193 if (op.equals(DEDataAdapterI.READ_TERMS) ||
194 op.equals(DEDataAdapterI.IMPORT_TERMS))
195 return new GOFlatFileGUI(op);
196 else if (op.equals(DEDataAdapterI.WRITE_TERMS) ||
197 op.equals(DEDataAdapterI.EXPORT_TERMS))
198 return new GOFlatFileGUI(op);
199 else if (op.equals(DEDataAdapterI.CONFIGURE))
200 return new GOFlatFileGUI(op);
201 else
202 return null;
203 }
204
205 public static void main(String [] args) throws Exception {
206 GOFlatFileAdapter adapter = new GOFlatFileAdapter();
207 adapter.setHideDownstream(true);
208 String filename = null;
209 String deffilename = null;
210 String usage = "Usage: goc [-v] [-sp] [-sd] [-c] [-d <defs>] <filename1> <filename2> <filename3> ...\n"+
211 "options:\n"+
212 "\t-v\tverbose mode; does not hide downstream errors\n"+
213 "\t-sp\tenables strict parentage checking\n"+
214 "\t-sd\tenables strict definition checking\n"+
215 "\t-c\tallow cycles in input file";
216 if (args.length == 0) {
217 System.err.println(usage);
218 System.exit(0);
219 }
220
221 Vector filenames = new Vector();
222 for(int i=0; i < args.length; i++) {
223 if (args[i].equals("-sd"))
224 adapter.setStrictDefinition(true);
225 else if (args[i].equals("-sp"))
226 adapter.setStrictParentage(true);
227 else if (args[i].equals("-c"))
228 adapter.setAllowCycles(true);
229 else if (args[i].equals("-v"))
230 adapter.setHideDownstream(false);
231 else if (args[i].equals("-d")) {
232 i++;
233 if (i >= args.length) {
234 System.err.println("Use of -d without providing filename");
235 System.err.println(usage);
236 System.exit(1);
237 }
238 deffilename = args[i];
239 } else
240 filenames.addElement(args[i]);
241 }
242
243 try {
244 adapter.setPath(filenames);
245 if (deffilename != null)
246 adapter.setDefinitionPath(deffilename);
247 adapter.getRoot();
248 } catch (GOFlatFileParseException e) {
249 System.err.println(e.toString());
250 } catch (DataAdapterException e) {
251 System.err.println(e.getMessage());
252 }
253 }
254
255 protected void setDefinition(
256 IDWrapper id,
257 Hashtable terms,
258 String definition,
259 String comment,
260 Vector refs,
261 CompoundGOFlatFileParseException defExceptions,
262 URL filename,
263 String startline,
264 int startlineNumber)
265 throws DataAdapterException {
266
267 if (id == null) {
268 defExceptions.addException(
269 new GOFlatFileParseException(
270 "No goid field in entry.",
271 filename,
272 startline,
273 startlineNumber,
274 0));
275 return;
276 }
277 if (definition == null) {
278 defExceptions.addException(
279 new GOFlatFileParseException(
280 "No definition field in entry.",
281 filename,
282 startline,
283 startlineNumber,
284 0));
285 return;
286 }
287
288 if (refs.size() < 1) {
289 defExceptions.addException(
290 new GOFlatFileParseException(
291 "No definition_reference field in entry.",
292 filename,
293 startline,
294 startlineNumber,
295 0));
296 return;
297 }
298
299 Term term = (Term) terms.get(id.toString());
300 if (term != null) {
301 term.setDefinition(definition);
302 if (comment == null)
303 term.setComment("");
304 else
305 term.setComment(comment);
306 for(int i=0; i < refs.size(); i++) {
307 term.addDefDbxref((Dbxref) refs.elementAt(i));
308 }
309 } else if (term == null && strictDefinition) {
310 defExceptions.addException(
311 new GOFlatFileParseException(
312 "Reference to non-existant GO id "+id.toString(),
313 filename,
314 startline,
315 startlineNumber,
316 0));
317 }
318 }
319
320 public void populateDefinitions(Hashtable allterms, URL filename)
321 throws DataAdapterException {
322 try {
323 URLConnection connection = filename.openConnection();
324 InputStream inputStream = connection.getInputStream();
325 BufferedReader reader =
326 new BufferedReader(new InputStreamReader(inputStream));
327 int totalBytes = connection.getContentLength();
328
329 if (totalBytes < 1)
330 return;
331 CompoundGOFlatFileParseException defExceptions =
332 new CompoundGOFlatFileParseException(hideDownstream);
333 int lineNumber = 0;
334 int byteNumber = 0;
335 int percentVal = 0;
336 IDWrapper goid = null;
337 String term = null;
338 String def = null;
339 String comment = null;
340 Vector references = new Vector();
341 boolean inDefinition = false;
342 int startlineNumber = -1;
343 String startline = null;
344 while(true) {
345 String line = reader.readLine();
346 lineNumber++;
347
348 if (line == null) {
349 if (!(goid == null ||
350 term == null ||
351 def == null ||
352 references.size() == 0))
353 setDefinition(goid,
354 allterms,
355 unescapeDefText(def),
356 unescapeDefText(comment),
357 references,
358 defExceptions,
359 filename,
360 startline,
361 startlineNumber);
362 break;
363 }
364
365 byteNumber += line.length();
366
367 int newPercent = 100 * byteNumber / totalBytes;
368 if (newPercent != percentVal) {
369 percentVal = newPercent;
370 fireProgressEvent(
371 new ProgressEvent(this,
372 new Double((double) percentVal),
373 "Parsing definitions..."));
374 }
375
376 if(line.trim().equals("")) {
377 if (!(goid == null ||
378 term == null ||
379 def == null ||
380 references.size() == 0)) {
381 setDefinition(goid,
382 allterms,
383 unescapeDefText(def),
384 unescapeDefText(comment),
385 references,
386 defExceptions,
387 filename,
388 startline,
389 startlineNumber);
390 }
391 goid = null;
392 term = null;
393 def = null;
394 comment = null;
395 references = new Vector();
396 startlineNumber = -1;
397 startline = null;
398 inDefinition = false;
399 }
400
401 if (line.trim().length() == 0 ||
402 line.charAt(0) == '!') {
403 continue;
404 }
405
406 if (startline == null) {
407 startline = line;
408 startlineNumber = lineNumber;
409 }
410
411 int start = findStartOfDefFieldData(line);
412 if (start == -1) {
413 if (inDefinition)
414 def += " "+line;
415 else {
416 defExceptions.addException(
417 new GOFlatFileParseException(
418 "No type field found, and line is not a "+
419 "continuation of definition text.",
420 filename,
421 line,
422 lineNumber,
423 0));
424 }
425 } else {
426 String key = line.substring(0,start - 2);
427 String value = line.substring(start);
428 if (key.equals("definition")) {
429 inDefinition = true;
430 if (def == null) {
431 def = value;
432 } else
433 defExceptions.addException(
434 new GOFlatFileParseException(
435 "Multiple definition fields for one entry.",
436 filename,
437 line,
438 lineNumber,
439 0));
440 } else if (key.equals("comment")) {
441 inDefinition = false;
442 if (comment == null) {
443 comment = value;
444 } else
445 defExceptions.addException(
446 new GOFlatFileParseException(
447 "Multiple comment fields for one entry.",
448 filename,
449 line,
450 lineNumber,
451 0));
452 } else if (key.equals("goid") || key.equals("id")) {
453 inDefinition = false;
454 if (goid == null) {
455 try {
456 Queue tokenList =
457 getQueueForLine(filename,
458 line,
459 lineNumber);
460 // discard goid type tag
461 tokenList.dequeue();
462 // discard colon
463 tokenList.dequeue();
464 goid = pullOffID(tokenList, exceptionHolder);
465 } catch (NoSuchElementException e) {
466 GOFlatFileParseException ex =
467 new GOFlatFileParseException(
468 "Unexpected end of line ",
469 filename,
470 line,
471 lineNumber,
472 line.length());
473 defExceptions.addException(ex);
474 }
475 } else
476 defExceptions.addException(
477 new GOFlatFileParseException(
478 "Multiple id fields for one entry.",
479 filename,
480 line,
481 lineNumber,
482 0));
483 } else if (key.equals("term")) {
484 inDefinition = false;
485 if (term == null) {
486 term = value;
487 } else
488 defExceptions.addException(
489 new GOFlatFileParseException(
490 "Multiple term name fields for one entry.",
491 filename,
492 line,
493 lineNumber,
494 0));
495 } else if (key.equals("definition_reference")) {
496 inDefinition = false;
497 try {
498 Queue tokens =
499 getQueueForLine(filename,
500 line,
501 lineNumber);
502 // discard definition_reference tag
503 tokens.dequeue();
504 // discard colon
505 GOToken tokenStart = (GOToken)
506 tokens.dequeue();
507
508 /*
509 GOToken keytoken = (GOToken) tokens.dequeue();
510 String dbname = keytoken.getToken();
511
512 GOToken token = (GOToken) tokens.dequeue();
513 if (!token.getToken().equals(":")) {
514 GOFlatFileParseException ex = new
515 GOFlatFileParseException(
516 "Unexpected seperator \""+token.getToken()+
517 "\" found. Expected \":\"",
518 token.getFilename(),
519 token.getLine(),
520 token.getLineNumber(),
521 token.getColNumber());
522 defExceptions.addException(ex);
523 }
524 String dbid = pullOffValues(tokens);
525 */
526
527 boolean foundColon = false;
528 String dbname = "";
529 String dbid = "";
530 while(!tokens.isEmpty()) {
531 GOToken token = (GOToken) tokens.dequeue();
532 if (token.getToken().equals(":")) {
533 foundColon = true;
534 continue;
535 }
536 if (foundColon) {
537 if (dbid.length() > 0)
538 dbid += " ";
539 dbid += token.getToken();
540 } else {
541 if (dbname.length() > 0)
542 dbname += " ";
543 dbname += token.getToken();
544 }
545 }
546 if (!foundColon) {
547 GOFlatFileParseException ex = new
548 GOFlatFileParseException(
549 "Illegal dbxref; no : found.",
550 tokenStart.getFilename(),
551 tokenStart.getLine(),
552 tokenStart.getLineNumber(),
553 tokenStart.getColNumber()+2);
554 defExceptions.addException(ex);
555 }
556
557 Dbxref ref = new Dbxref(dbname,
558 dbid,
559 Dbxref.DEFINITION);
560 if (!references.contains(ref))
561 references.addElement(ref);
562 } catch (NoSuchElementException e) {
563 GOFlatFileParseException ex =
564 new GOFlatFileParseException(
565 "Unexpected end of line ",
566 filename,
567 line,
568 lineNumber,
569 line.length());
570 defExceptions.addException(ex);
571 }
572 } else {
573 defExceptions.addException(
574 new GOFlatFileParseException(
575 "Unrecognized type field \""+key+"\"",
576 filename,
577 line,
578 lineNumber,
579 0));
580 }
581 }
582 }
583 if (!defExceptions.isEmpty())
584 throw defExceptions;
585 } catch (FileNotFoundException e) {
586 throw new DataAdapterException("Cannot find "+filename);
587 } catch (IOException e) {
588 throw new DataAdapterException("File unreadable");
589 }
590 }
591
592 private int findStartOfDefFieldData(String in) {
593 int firstSpace = in.indexOf(" ");
594 if (firstSpace < 1 ||
595 in.charAt(firstSpace - 1) != ':')
596 return -1;
597 else return firstSpace+1;
598 }
599
600 public void init() {
601 // do nothing
602 }
603
604 public void setPath(String filename) {
605 this.filenames = new Vector();
606 addPath(filename);
607 }
608
609 public void setPath(Vector filenames) {
610 this.filenames = new Vector();
611 for(int i=0; i < filenames.size() ; i++) {
612 addPath((String) filenames.elementAt(i));
613 }
614 }
615
616 protected void addPath(String path) {
617 URL url = getURLForPath(path);
618 if (url != null)
619 filenames.addElement(url);
620 }
621
622 protected URL getURLForPath(String path) {
623 URL url = null;
624 try {
625 url = new URL(path);
626 } catch (MalformedURLException e) {
627 try {
628 url = new URL("file:"+path);
629 } catch (MalformedURLException ex) {
630 }
631 }
632 return url;
633 }
634
635 public void setDefinitionPath(String defFilename) {
636 if (defFilename != null && defFilename.length() > 0)
637 this.defFilename = getURLForPath(defFilename);
638 else
639 this.defFilename = null;
640 }
641
642 protected int getLinesInFile(URL filename) throws IOException {
643 int lineNumber = 0;
644 BufferedReader reader =
645 new BufferedReader(new InputStreamReader(filename.openStream()));
646 while(reader.readLine() != null)
647 lineNumber++;
648 reader.close();
649 return lineNumber;
650 }
651
652 public Term importTerms(Term root, boolean stripIds)
653 throws DataAdapterException {
654 Term term;
655 if (stripIds) {
656 term = getRoot(new Hashtable());
657 wipeOutIDs(term);
658 } else {
659 term = getRoot(root.getAllDescendantsHash(true));
660 }
661 return term;
662 }
663
664 private void wipeOutIDs(Term term) {
665 term.setID(null, null);
666 for(int i=0; i < term.getChildren().size(); i++) {
667 TermRelationship tr = (TermRelationship) term.getChildren().
668 elementAt(i);
669 wipeOutIDs(tr.getChild());
670 }
671 }
672
673 public synchronized DEEditHistory getRoot() throws DataAdapterException {
674 relationshipTypes.removeAllElements();
675 Term root = getRoot(new Hashtable());
676 DEEditHistory history = new DEEditHistory(root);
677 history.setUser(System.getProperty("user.name"));
678 history.setDate(new Date());
679 history.setRelationshipTypes(relationshipTypes);
680 if (relationshipTypes.size() > 0)
681 history.setDefaultRelationshipType((TermRelationshipType)
682 relationshipTypes.get(0));
683 history.setComment(this.comment);
684 return history;
685 }
686
687 public synchronized Term getRoot(Hashtable allterms)
688 throws DataAdapterException {
689 Term root = null;
690 URL rootfile = null;
691 for(int i=0; i < filenames.size(); i++) {
692 URL filename = (URL) filenames.elementAt(i);
693 exceptionHolder = new CompoundGOFlatFileParseException(hideDownstream);
694 parentageList = new Vector();
695 Hashtable termHash = new Hashtable();
696
697 Term term = getTerms(allterms, filename);
698 checkParentage(allterms, parentageList, filename);
699 if (root == null) {
700 root = term;
701 root.setRoot(true);
702 rootfile = filename;
703 } else {
704
705 if (!term.getTerm().equals(root.getTerm()) ||
706 !term.getID().equals(root.getID()))
707 throw new DataAdapterException(rootfile+" and "+filename+
708 " have different root "+
709 "terms!");
710 /*
711 Vector children = term.getChildren();
712 for(int j=0; j < children.size(); j++) {
713 TermRelationship tr = (TermRelationship) children.
714 elementAt(j);
715 tr.getChild().removeAllParents();
716 System.err.println("removing parents from "+tr.getChild());
717 root.addChild(tr.getChild(), tr.getType());
718 System.err.println("added "+tr.getChild()+" to "+root+", parents are now "+tr.getChild().getParents());
719 // tr.getChild().getParents().remove(term);
720 }
721 */
722 }
723 //allterms.putAll(termHash);
724 }
725 if (defFilename != null)
726 populateDefinitions(root.getAllDescendantsHash(true), defFilename);
727
728 parentageList.removeAllElements();
729 return root;
730 }
731
732 protected static void println(PrintStream stream) {
733 println(stream, "");
734 }
735
736 protected static void println(PrintStream stream, String string) {
737 stream.print(string+"\n");
738 }
739
740 public void checkParentage(Hashtable allterms,
741 Vector parentageList,
742 URL filename) throws DataAdapterException {
743 Hashtable parentCounter = new Hashtable();
744 CompoundGOFlatFileParseException parentageErrors =
745 new CompoundGOFlatFileParseException(hideDownstream);
746 for(int i=0; i < parentageList.size(); i++) {
747 ParentageHolder ph = (ParentageHolder) parentageList.elementAt(i);
748
749 Term term = (Term) allterms.get(ph.childID);
750 Term parent = (Term) allterms.get(ph.parentID);
751
752 TermRelationshipType type;
753 type = (TermRelationshipType)
754 charToType.get(ph.type);
755
756 if (type == null) {
757 type = FAILED_TYPE;
758 parentageErrors.addException(
759 new GOFlatFileParseException(
760 "Unrecognized parentage character \""+ph.type+"\"",
761 filename,
762 ph.line,
763 ph.lineNumber,
764 ph.colNumber));
765 }
766
767 if (parent == null) {
768 if (allowDangling) {
769 DanglingRelationship dr =
770 new DanglingRelationship(ph.parentID,
771 ph.name,
772 type);
773 term.addDanglingParent(dr);
774 } else {
775 parentageErrors.addException(
776 new GOFlatFileParseException(
777 "No term exists with id "+ph.parentID,
778 filename,
779 ph.line,
780 ph.lineNumber,
781 ph.colNumber));
782 }
783 } else {
784 Vector trs = getTRsForParentWithID(term, ph.parentID);
785 if (trs.size() == 0) {
786 if (strictParentage) {
787 parentageErrors.addException(
788 new GOFlatFileParseException(
789 ph.parentID+
790 " is listed as parent of "+ph.childID+", "+
791 "but "+ph.childID+" does not appear in "+
792 "the file as a child of "+ph.parentID,
793 filename,
794 ph.line,
795 ph.lineNumber,
796 ph.colNumber));
797 } else {
798 TermRelationship tr =
799 new TermRelationship(term, parent, type);
800 if (!relationshipTypes.contains(type))
801 relationshipTypes.add(type);
802 parent.addChild(tr);
803 }
804 }
805
806 /*
807 // check for type agreement
808 if (tr != null && !tr.getType().equals(type)) {
809 parentageErrors.addException(
810 new GOFlatFileParseException(
811 "The parentage relationship given contradicts an "+
812 "earlier statement of this relationship.",
813 filename,
814 ph.line,
815 ph.lineNumber,
816 ph.colNumber));
817 }
818 */
819 // check for cycles
820 // if (!allowCycles && contains(term, parent)) {
821 if (!allowCycles && parent.hasAncestor(term)) {
822 parentageErrors.addException(
823 new GOFlatFileParseException(
824 "This relationship would create a cycle.",
825 filename,
826 ph.line,
827 ph.lineNumber,
828 ph.colNumber));
829 throw parentageErrors;
830 }
831 }
832 }
833 if (!parentageErrors.isEmpty())
834 throw parentageErrors;
835
836 } //end checkParentage()
837
838 protected static boolean contains(Term a, Term b) {
839 return contains(a, b, new Hashtable());
840 }
841
842 private static boolean contains(Term a, Term b, Hashtable lookedAt) {
843 if (a == b || lookedAt.containsKey(a))
844 return true;
845 lookedAt.put(a, a);
846 Enumeration e = a.getChildren().elements();
847 while(e.hasMoreElements()) {
848 TermRelationship tr = (TermRelationship) e.nextElement();
849 Term child = tr.getChild();
850 try {
851 if (contains(child, b, lookedAt)) {
852 return true;
853 }
854 } catch (Throwable ex) {
855 System.err.println("got "+ex);
856 System.exit(1);
857 }
858 }
859 return false;
860 }
861
862 public Vector getTRsForParentWithID(Term child, String id) {
863 Vector out = new Vector();
864 Vector parents = child.getParents();
865 for(int i=0; i < parents.size(); i++) {
866 TermRelationship tr = (TermRelationship) parents.elementAt(i);
867 if (tr.getParent().getID().equals(id)) {
868 out.addElement(tr);
869 continue;
870 }
871 Vector synonyms = tr.getParent().getSynonyms();
872 for(int j=0; j < synonyms.size(); j++) {
873 Synonym s = (Synonym) synonyms.elementAt(j);
874 if (s.getID() != null && s.getID().equals(id)) {
875 out.addElement(tr);
876 continue;
877 }
878 }
879 }
880 return out;
881 }
882
883 public Term findTermInHash(Term term,
884 Hashtable hash) {
885 if (hash.containsKey(term.getID())) {
886 return (Term) hash.get(term.getID());
887 }
888 /*
889 Vector synonyms = term.getSynonyms();
890 for(int i=0; i < synonyms.size(); i++) {
891 Synonym s = (Synonym) synonyms.elementAt(i);
892 if (s.getID() != null && hash.containsKey(s.getID())) {
893 return (Term) hash.get(s.getID());
894 }
895 }
896 */
897 return null;
898 }
899
900 public void putTermInHash(Term term,
901 Hashtable hash) {
902 hash.put(term.getID(), term);
903 /*
904 Vector synonyms = term.getSynonyms();
905 for(int i=0; i < synonyms.size(); i++) {
906 Synonym s = (Synonym) synonyms.elementAt(i);
907 if (s.getID() != null)
908 hash.put(s.getID(), term);
909 }
910 */
911 }
912
913 // make put term in hash function too
914
915 public Term getTerms(Hashtable allterms, URL filename)
916 throws DataAdapterException {
917 Term oldTerm = null;
918 root = null;
919 termStack = new Stack();
920 int currentDepth = 0;
921 int lineNumber = 0;
922 int byteNumber = 0;
923 String currentLine = null;
924 boolean inputStarted = false;
925 String leaderComment = null;
926 int percentVal = 0;
927 try {
928 URLConnection connection = filename.openConnection();
929 InputStream inputStream = connection.getInputStream();
930 BufferedReader reader =
931 new BufferedReader(new InputStreamReader(inputStream));
932 int totalBytes = connection.getContentLength();
933 String shortfilename = (new File(filename.getFile())).getName();
934 while((currentLine = reader.readLine()) != null) {
935 lineNumber++;
936 byteNumber += currentLine.length();
937 if (totalBytes > 0) {
938 int newPercent = 100 * byteNumber / totalBytes;
939 if (newPercent != percentVal) {
940 percentVal = newPercent;
941 fireProgressEvent(
942 new ProgressEvent(this,
943 new Double((double) percentVal),
944 "Parsing "+
945 shortfilename+"..."));
946 }
947 }
948
949 if (currentLine.trim().length() == 0) {
950 GOFlatFileParseException ex = new
951 GOFlatFileParseException(
952 "Blank lines are not allowed",
953 filename,
954 currentLine,
955 lineNumber,
956 1);
957 exceptionHolder.addException(ex);
958 continue;
959 }
960 if (isComment(currentLine)) {
961 if (!inputStarted) {
962 String commentLine = currentLine.substring(1);
963 if (commentLine.startsWith("type:")) {
964 commentLine = commentLine.substring(5);
965 StringTokenizer stringTokenizer =
966 new StringTokenizer(commentLine);
967 try {
968 String typeChar = stringTokenizer.nextToken();
969 String typeName = stringTokenizer.nextToken();
970 String typeDesc = "";
971 boolean first = true;
972 while(stringTokenizer.hasMoreTokens()) {
973 if (!first) {
974 typeDesc = typeDesc + " "+
975 stringTokenizer.nextToken();
976 } else {
977 first = false;
978 typeDesc = stringTokenizer.nextToken();
979 }
980 }
981 TermRelationshipType trt = new
982 TermRelationshipType(typeName, typeDesc);
983 if (!relationshipTypes.contains(trt)) {
984 relationshipTypes.add(trt);
985 charToType.put(typeChar, trt);
986 typeToChar.put(trt, typeChar);
987 }
988 } catch (NoSuchElementException ex) {
989 }
990 } else if (leaderComment == null ||
991 leaderComment.length() == 0)
992 leaderComment = commentLine;
993 else
994 leaderComment += "\n"+commentLine;
995 }
996 continue;
997 } else
998 inputStarted = true;
999 int oldDepth = currentDepth;
1000 currentDepth = countLeadingSpaces(currentLine);
1001 Term term = parseLine(filename, currentLine, lineNumber);
1002 if (root == null && currentDepth > 0) {
1003 GOFlatFileParseException ex = new
1004 GOFlatFileParseException(
1005 "Bad indentation. The first line of a file must "+
1006 "not be indented.",
1007 filename,
1008 currentLine,
1009 lineNumber,
1010 1);
1011 exceptionHolder.addException(ex);
1012 throw exceptionHolder;
1013 } else if (root == null && !currentType.equals("$")) {
1014 GOFlatFileParseException ex = new
1015 GOFlatFileParseException(
1016 "Bad root symbol. The root term must always be "+
1017 "listed with the $ symbol.",
1018 filename,
1019 currentLine,
1020 lineNumber,
1021 1);
1022 exceptionHolder.addException(ex);
1023 throw exceptionHolder;
1024 }
1025
1026 // quit if we've reached an end of file marker
1027 if (term == null)
1028 break;
1029
1030 Term found = (Term) allterms.get(term.getID());
1031
1032 // check to see if synonyms are found too
1033
1034 if (found != null) {
1035 if (!found.getTerm().equals(term.getTerm())) {
1036 GOFlatFileParseException ex = new
1037 GOFlatFileParseException(
1038 "Misspelled term name or illegal reuse of a"+
1039 " GO id. This term was previously defined with "+
1040 "the name \""+found.getTerm()+
1041 "\", but new name is \""+term.getTerm()+"\".",
1042 filename,
1043 currentLine,
1044 lineNumber,
1045 1);
1046 exceptionHolder.addException(ex);
1047 }
1048 term = found;
1049 } else {
1050 putTermInHash(term, allterms);
1051 }
1052
1053 if (oldDepth > currentDepth) {
1054 for(int i=0; i < oldDepth - currentDepth; i++)
1055 termStack.pop();
1056 } else if (oldDepth < currentDepth) {
1057 // structural problem; exception must be thrown immediately
1058 if (currentDepth - oldDepth != 1) {
1059 GOFlatFileParseException ex = new
1060 GOFlatFileParseException(
1061 "Bad indentation. No line may be greater than 1 "+
1062 "space deeper than its parent.",
1063 filename,
1064 currentLine,
1065 lineNumber,
1066 1);
1067 exceptionHolder.addException(ex);
1068 throw exceptionHolder;
1069 }
1070 termStack.push(oldTerm);
1071 }
1072 if (termStack.size() > 0) {
1073 Term parent = (Term) termStack.peek();
1074
1075 if (!allowCycles && parent.hasAncestor(term)) {
1076 GOFlatFileParseException ex = new
1077 GOFlatFileParseException(
1078 "Attempt to make "+
1079 term.getID()+" an "+
1080 "ancestor of itself. This could be a "+
1081 "structural error, or it could be caused "+
1082 "by the improper reuse of a go id.",
1083 filename,
1084 currentLine,
1085 lineNumber,
1086 1);
1087 exceptionHolder.addException(ex);
1088 throw exceptionHolder;
1089 }
1090
1091 TermRelationshipType relType = (TermRelationshipType)
1092 charToType.get(currentType);
1093 if (relType == null) {
1094 relType = FAILED_TYPE;
1095 GOFlatFileParseException ex = new
1096 GOFlatFileParseException(
1097 "Unrecognized type character \""+
1098 currentType+"\"",
1099 filename,
1100 currentLine,
1101 lineNumber,
1102 1);
1103 exceptionHolder.addException(ex);
1104 } else {
1105 if (!relationshipTypes.contains(relType))
1106 relationshipTypes.add(relType);
1107 }
1108 parent.addChild(term, relType);
1109 } else {
1110 if (root == null) {
1111 root = term;
1112 root.setRoot(true);
1113 } else {
1114 // structural problem
1115 if (currentDepth - oldDepth != 1) {
1116 GOFlatFileParseException ex = new
1117 GOFlatFileParseException(
1118 "Attempt to assign second root term.",
1119 filename,
1120 currentLine,
1121 lineNumber,
1122 1);
1123 exceptionHolder.addException(ex);
1124 throw exceptionHolder;
1125 }
1126 }
1127 }
1128 oldTerm = term;
1129 }
1130
1131 // discard trailing whitespace
1132 while((currentLine = reader.readLine()) != null) {
1133 if (currentLine.trim().length() > 0) {
1134 GOFlatFileParseException ex = new GOFlatFileParseException(
1135 "Data after end of file token found.",
1136 filename,
1137 currentLine,
1138 lineNumber,
1139 1);
1140 exceptionHolder.addException(ex);
1141 throw exceptionHolder;
1142 }
1143 }
1144 if (root == null) {
1145 GOFlatFileParseException ex = new GOFlatFileParseException(
1146 "File contains no data.",
1147 filename,
1148 "",
1149 0,
1150 1);
1151 exceptionHolder.addException(ex);
1152 }
1153 if (!exceptionHolder.isEmpty())
1154 throw exceptionHolder;
1155
1156 if (leaderComment != null && leaderComment.length() > 0)
1157 setComment(leaderComment);
1158 return root;
1159 } catch (NoSuchElementException e) {
1160 GOFlatFileParseException ex = new GOFlatFileParseException(
1161 "Unexpected end of line during term definition."+
1162 " (There may be other errors after this one).",
1163 filename,
1164 currentLine,
1165 lineNumber,
1166 currentLine.length());
1167 exceptionHolder.addException(ex);
1168 throw exceptionHolder;
1169 } catch (FileNotFoundException e) {
1170 throw new DataAdapterException("Cannot find "+filename);
1171 } catch (IOException e) {
1172 throw new DataAdapterException("File unreadable");
1173 }
1174 }
1175
1176 protected boolean isComment(String in) {
1177 return in.charAt(0) == '!';
1178 }
1179
1180 protected Term parseLine(URL filename, String line, int lineNum)
1181 throws GOFlatFileParseException {
1182 Queue tokenList = getQueueForLine(filename, line, lineNum);
1183 if (((GOToken) tokenList.peek()).getToken().equals("$") &&
1184 tokenList.size() == 1)
1185 return null;
1186 return getTermFromTokens(tokenList, filename, line, lineNum);
1187 }
1188
1189 protected Term getTermFromTokens(Queue tokens, URL filename, String line,
1190 int lineNum)
1191 throws GOFlatFileParseException {
1192 currentType = pullOffType(tokens);
1193
1194 if (tokens.size() == 0) {
1195 GOFlatFileParseException ex = new GOFlatFileParseException(
1196 "Expected term name, found nothing.",
1197 filename,
1198 line,
1199 lineNum,
1200 0);
1201 exceptionHolder.addException(ex);
1202 return new Term(null);
1203 }
1204
1205 String currentLine = ((GOToken) tokens.peek()).getLine();
1206 String currentFilename = ((GOToken) tokens.peek()).getFilename();
1207 int currentLineNumber = ((GOToken) tokens.peek()).getLineNumber();
1208
1209 String name = pullOffTerm(tokens);
1210
1211 // discard semicolon
1212 GOToken semicolonToken = (GOToken) tokens.dequeue();
1213 if (!semicolonToken.getToken().equals(";")) {
1214 GOFlatFileParseException ex = new GOFlatFileParseException(
1215 "Expected \";\" instead of "+
1216 "\""+semicolonToken.getToken()+"\"",
1217 semicolonToken.getFilename(),
1218 semicolonToken.getLine(),
1219 semicolonToken.getLineNumber(),
1220 semicolonToken.getColNumber());
1221 exceptionHolder.addException(ex);
1222 }
1223
1224 IDWrapper id = pullOffID(tokens, exceptionHolder);
1225 Term term = new Term(name, id.toString(), false);
1226 try {
1227 Vector synonyms = pullOffIDList(tokens);
1228 Vector categories = new Vector();
1229 Vector dbxrefs = new Vector();
1230 while(!tokens.isEmpty()) {
1231 GOToken token = (GOToken) tokens.dequeue();
1232 if (charToType.get(token.getToken()) != null) {
1233 discardParentageTokens(token, id.toString(), tokens);
1234 } else if (token.getToken().equals(";")) {
1235 GOToken keytoken = (GOToken) tokens.dequeue();
1236 String key = keytoken.getToken();
1237
1238 token = (GOToken) tokens.dequeue();
1239 if (!token.getToken().equals(":")) {
1240 GOFlatFileParseException ex = new
1241 GOFlatFileParseException(
1242 "Unexpected seperator \""+token.getToken()+
1243 "\" found. Expected \":\"",
1244 token.getFilename(),
1245 token.getLine(),
1246 token.getLineNumber(),
1247 token.getColNumber());
1248 exceptionHolder.addException(ex);
1249 }
1250
1251 String value = pullOffValues(tokens);
1252 if (key.equals("synonym"))
1253 synonyms.addElement(new Synonym(value));
1254 else {
1255 dbxrefs.addElement(new Dbxref(key,
1256 value,
1257 Dbxref.ANALOG));
1258 }
1259 /*
1260 else {
1261 GOFlatFileParseException ex = new
1262 GOFlatFileParseException(
1263 "Unexpected property \""+key+"\" found. "+
1264 "Expected \"synonym\", \"TC\", or \"EC\".",
1265 keytoken.getFilename(),
1266 keytoken.getLine(),
1267 keytoken.getLineNumber(),
1268 keytoken.getColNumber());
1269 exceptionHolder.addException(ex);
1270 }
1271 */
1272 } else {
1273 GOFlatFileParseException ex = new GOFlatFileParseException(
1274 "Unexpected character "+token.getToken(),
1275 token.getFilename(),
1276 token.getLine(),
1277 token.getLineNumber(),
1278 token.getColNumber());
1279 exceptionHolder.addException(ex);
1280 }
1281 }
1282 term.setSynonyms(synonyms);
1283 term.setDbxrefs(dbxrefs);
1284 term.setCategories(categories);
1285 } catch (NoSuchElementException e) {
1286 GOFlatFileParseException ex = new GOFlatFileParseException(
1287 "Unexpected end of line ",
1288 currentFilename,
1289 currentLine,
1290 currentLineNumber,
1291 currentLine.length());
1292 exceptionHolder.addException(ex);
1293 }
1294 return term;
1295 }
1296
1297 protected String pullOffValues(Queue tokens) {
1298 GOToken token = (GOToken) tokens.dequeue();
1299 String value = token.getToken();
1300 while(!tokens.isEmpty() &&
1301 !((GOToken) tokens.peek()).getToken().equals(";") &&
1302 ((GOToken) tokens.peek()).getToken().charAt(0) !=
1303 BOUNDARY_CHAR &&
1304 !((((GOToken) tokens.peek()).getToken().length() == 1) &&
1305 (charToType.get(((GOToken) tokens.peek()).
1306 getToken()) != null))) {
1307 GOToken currentToken = (GOToken) tokens.dequeue();
1308