Save This Page
Home » openjdk-7 » net » sf » bibkeeper » [javadoc | source]
    1   package net.sf.bibkeeper;
    2   
    3   import java.io.IOException;
    4   import java.io.PushbackReader;
    5   import java.io.Reader;
    6   import java.util.HashMap;
    7   import java.util.Iterator;
    8   import java.util.StringTokenizer;
    9   
   10   public class BibtexParser
   11   {
   12       private PushbackReader _in;
   13       private BibtexDatabase _db;
   14       private HashMap _meta;
   15       private boolean _eof = false;
   16       private int line = 1;
   17   
   18       protected HashMap types = new HashMap();
   19   
   20       public BibtexParser(Reader in)
   21       {
   22           if (in == null)
   23           {
   24               throw new NullPointerException();
   25           }
   26   
   27           _in = new PushbackReader(in);
   28   
   29   	// Set up the 'types' hashmap to contain the known entry types.
   30   	Iterator i = BibtexEntryType.ALL_TYPES.iterator();
   31   	for (;i.hasNext();) {
   32   	    BibtexEntryType tp = (BibtexEntryType)(i.next());	   
   33   	    types.put(tp.getName().toLowerCase(), tp);
   34   	}
   35   
   36       }
   37   
   38       private void skipWhitespace() throws IOException
   39       {
   40           int c;
   41   
   42           while (true)
   43           {
   44               c = read();
   45               if ((c == -1) || (c == 65535))
   46               {
   47                   _eof = true;
   48                   return;
   49               }
   50              
   51   	    if (Character.isWhitespace((char) c))
   52               {
   53                   continue;
   54               }
   55   	    else
   56               // found non-whitespace char
   57   	    //Util.pr("SkipWhitespace, stops: "+c);
   58               unread(c);
   59   	    /*	    try {
   60   		Thread.currentThread().sleep(500);
   61   		} catch (InterruptedException ex) {}*/
   62               break;
   63           }
   64       }
   65   
   66       public ParserResult parse() throws IOException {
   67   
   68           _db = new BibtexDatabase(); // Bibtex related contents.
   69   	_meta = new HashMap();      // Metadata in comments for Bibkeeper.
   70   
   71           skipWhitespace();
   72   
   73           try
   74           {
   75               while (!_eof)
   76               {
   77   
   78   		consumeUncritically('@');
   79   		skipWhitespace();
   80   		String entryType = parseTextToken();
   81   		BibtexEntryType tp = 
   82   		    (BibtexEntryType)types.get(entryType.toLowerCase()); 
   83   		if (tp != null) 
   84                   {
   85   		    //Util.pr("Found: "+tp.getName());
   86   		    _db.insertEntry(parseEntry(tp));
   87   		}
   88   		else if (entryType.toLowerCase().equals("preamble")) {
   89   		    _db.setPreamble(parsePreamble());
   90   		}
   91   		else if (entryType.toLowerCase().equals("string")) {
   92   		    _db.addString(parseString(), _db.getStringCount());
   93   		}
   94   		else if (entryType.toLowerCase().equals("comment")) {
   95   		    StringBuffer comment = parseBracketedText();
   96   		    /**
   97   		     *
   98   		     * Metadata are used to store Bibkeeper-specific
   99   		     * information in .bib files.
  100   		     *
  101   		     * Metadata are stored in bibtex files in the format
  102   		     * @comment{bibkeeper-meta: type:data0;data1;data2;...}
  103   		     *
  104   		     * Each comment that starts with the META_FLAG is stored
  105   		     * in the meta HashMap, with type as key.
  106   		     */
  107   
  108   		    if (comment.substring(0, GUIGlobals.META_FLAG.length())
  109   			.equals(GUIGlobals.META_FLAG)) {
  110   
  111   			String rest = comment.substring
  112   			    (GUIGlobals.META_FLAG.length());
  113   			int pos = rest.indexOf(':');
  114   
  115   			if (pos > 0)
  116   			    _meta.put
  117   				(rest.substring(0, pos), rest.substring(pos+1));
  118   		    }
  119   		}
  120   		//else
  121   		//    throw new RuntimeException("Unknown entry type: "+entryType);
  122                   skipWhitespace();
  123               }
  124   
  125               return new ParserResult(_db, _meta);
  126           }
  127           catch (KeyCollisionException kce)
  128           {
  129               throw new IOException("Duplicate ID in bibtex file: " +
  130                   kce.toString());
  131           }
  132       }
  133   
  134       private int peek() throws IOException
  135       {
  136           int c = read();
  137           unread(c);
  138   
  139           return c;
  140       }
  141   
  142       private int read() throws IOException
  143       {
  144   	int c = _in.read();
  145   	if (c == '\n')
  146   	    line++;
  147   	return c;
  148       }
  149   
  150       private void unread(int c) throws IOException
  151       {
  152   	if (c == '\n')
  153   	    line--;
  154   	_in.unread(c);
  155       }
  156   
  157       public BibtexString parseString() throws IOException
  158       {
  159   	//Util.pr("Parsing string");
  160   	skipWhitespace();
  161   	consume('{','(');
  162   	//while (read() != '}');
  163   	skipWhitespace();
  164   	//Util.pr("Parsing string name");
  165   	String name = parseTextToken();
  166   	//Util.pr("Parsed string name");
  167   	skipWhitespace();
  168   	//Util.pr("Now the contents");
  169   	String content = parseFieldContent();
  170   	//Util.pr("Now I'm going to consume a }");
  171   	consume('}',')');
  172   	//Util.pr("Finished string parsing.");
  173   	return new BibtexString(name, content);
  174       }
  175   
  176       public String parsePreamble() throws IOException
  177       {
  178   	int brackets = 0;
  179   
  180   	return parseBracketedText().toString();
  181       }
  182   
  183       public BibtexEntry parseEntry(BibtexEntryType tp) throws IOException
  184       {
  185   	skipWhitespace();
  186   	consume('{','(');
  187   	skipWhitespace();
  188   	String key = parseTextToken(),
  189   	    id = Util.createID(tp, _db);
  190   
  191   	BibtexEntry result = new BibtexEntry(id, tp); 
  192   	result.setField(GUIGlobals.KEY_FIELD, key);
  193   
  194   	skipWhitespace();
  195   	
  196   	while (true)
  197   	{
  198   	    int c = peek();
  199   	    if ((c == '}') || (c == ')'))	    
  200   	    {
  201   		break;
  202   	    }
  203   
  204   	    consume(',');
  205   
  206   	    skipWhitespace();
  207   
  208   	    c = peek();
  209   	    if ((c == '}') || (c == ')'))
  210   	    {
  211   		break;
  212   	    }
  213   	    parseField(result);
  214   	}
  215   
  216   	consume('}',')');
  217   	return result;
  218       }
  219   
  220       private void parseField(BibtexEntry entry) throws IOException
  221       {
  222           String key = parseTextToken().toLowerCase();
  223   	//Util.pr("_"+key+"_");
  224   	String content = parseFieldContent();
  225   	if (content.length() > 0)
  226   	    entry.setField(key, content);
  227       }
  228   
  229       private String parseFieldContent() throws IOException
  230       {
  231           skipWhitespace();
  232           consume('=');
  233           skipWhitespace();
  234   	StringBuffer value = new StringBuffer();
  235           int c,j='.';
  236   
  237   	while (((c = peek()) != ',') && (c != '}') && (c != ')'))
  238           {
  239   
  240   	    if (_eof) {
  241   			throw new RuntimeException("Error in line "+line+
  242   						   ": EOF in mid-string");
  243   	    }
  244   	    if (c == '"')
  245   	    {
  246   		// value is a string
  247   		consume('"');
  248   
  249   		while (!((peek() == '"') && (j != '\\')))
  250   		{
  251   		    j = read();
  252   		    if (_eof || (j == -1) || (j == 65535))
  253                       {
  254   			throw new RuntimeException("Error in line "+line+
  255   						   ": EOF in mid-string");
  256                       }
  257   
  258   		    value.append((char) j);
  259   		}
  260   
  261   		consume('"');
  262   
  263   	    }
  264   	    else if (c == '{') {
  265   		// Value is a string enclosed in brackets. There can be pairs
  266   		// of brackets inside of a field, so we need to count the brackets
  267   		// to know when the string is finished.
  268   
  269   		value.append(parseBracketedText());
  270   	    }
  271   	    else if (Character.isDigit((char) c))
  272   	    {
  273   		// value is a number
  274   		String numString = parseTextToken();
  275   		int numVal = Integer.parseInt(numString);
  276   		value.append((new Integer(numVal)).toString());
  277   		//entry.setField(key, new Integer(numVal));
  278   	    }
  279   	    else if (c == '#')
  280   	    {    
  281   		//value.append(" # ");
  282   		consume('#');
  283   	    } 
  284   	    else
  285   	    {
  286   		String textToken = parseTextToken();
  287   		if (textToken.length() == 0)
  288   		    throw new IOException("Error in line "+line+" or above: "+
  289   					  "Empty text token.\nThis could be caused "+
  290   					  "by a missing comma between two fields.");
  291   		value.append("#"+textToken+"#");
  292   		//Util.pr(parseTextToken());	    
  293   		//throw new RuntimeException("Unknown field type");
  294   	    }
  295   	    skipWhitespace();
  296   	}
  297   	//Util.pr("Returning field content: "+value.toString());
  298   	return value.toString();
  299   
  300       }
  301   
  302       private String parseTextToken() throws IOException
  303       {
  304           StringBuffer token = new StringBuffer(20);
  305   
  306           while (true)
  307           {
  308               int c = read();
  309   	    //Util.pr(".. "+c);
  310               if (c == -1)
  311               {
  312                   _eof = true;
  313   
  314                   return token.toString();
  315               }
  316   
  317               if (Character.isLetterOrDigit((char) c) || (c == ':') || (c == '-')
  318   		|| (c == '_') || (c == '*') || (c == '+') || (c == '.')
  319   		|| (c == '/'))
  320               {
  321                   token.append((char) c);
  322               }
  323               else
  324               {
  325                   unread(c);
  326   		//Util.pr("Pasted text token: "+token.toString());
  327                   return token.toString();
  328               }
  329           }
  330       }
  331   
  332       private StringBuffer parseBracketedText() throws IOException
  333       {
  334   	//Util.pr("Parse bracketed text");
  335   	StringBuffer value = new StringBuffer();
  336   
  337   	consume('{');
  338   
  339   	int brackets = 0;
  340   
  341   	while (!((peek() == '}') && (brackets == 0))) 
  342           {
  343   	    
  344   	    int j = read();
  345               if ((j == -1) || (j == 65535))
  346   	    {
  347   		throw new RuntimeException("Error in line "+line
  348   					   +": EOF in mid-string");
  349   	    }
  350   	    else if (j == '{')
  351   		brackets++;
  352   	    else if (j == '}')
  353   		brackets--;
  354   
  355   	    // If we encounter whitespace of any kind, read it as a
  356   	    // simple space, and ignore any others that follow immediately.
  357   	    if (Character.isWhitespace((char)j)) {
  358   		value.append(' ');
  359   		skipWhitespace();
  360   	    } else
  361   		value.append((char) j);
  362   	    
  363   	}
  364   	
  365   	consume('}');
  366   
  367   	return value;
  368       }
  369   
  370   
  371       private void consume(char expected) throws IOException
  372       {
  373           int c = read();
  374   
  375           if (c != expected)
  376           {
  377               throw new RuntimeException("Error in line "+line
  378   		    +": Expected " 
  379   		    + expected + " but received " + (char) c);
  380           }
  381   
  382       }
  383   
  384       private void consumeUncritically(char expected) throws IOException
  385       {
  386   	int c;
  387   	while (((c = read()) != expected) && (c != -1) && (c != 65535));
  388   	if ((c == -1) || (c == 65535))
  389   	    _eof = true;
  390       }
  391   
  392       private void consume(char expected1, char expected2) throws IOException
  393       {
  394   	// Consumes one of the two, doesn't care which appears.
  395   
  396           int c = read();
  397   
  398           if ((c != expected1) && (c != expected2))
  399           {
  400               throw new RuntimeException("Error in line "+line+": Expected " +
  401                   expected1 + " or " + expected2 + " but received " + (int) c);
  402   
  403           }
  404   
  405       }
  406   }

Save This Page
Home » openjdk-7 » net » sf » bibkeeper » [javadoc | source]