Save This Page
Home » pentaho-reporting-engine-classic-0.8.10 » org » jfree » report » util » [javadoc | source]
    1   /**
    2    * =========================================================
    3    * Pentaho-Reporting-Classic : a free Java reporting library
    4    * =========================================================
    5    *
    6    * Project Info:  http://reporting.pentaho.org/
    7    *
    8    * (C) Copyright 2001-2007, by Object Refinery Ltd, Pentaho Corporation and Contributors.
    9    *
   10    * This library is free software; you can redistribute it and/or modify it under the terms
   11    * of the GNU Lesser General Public License as published by the Free Software Foundation;
   12    * either version 2.1 of the License, or (at your option) any later version.
   13    *
   14    * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
   15    * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
   16    * See the GNU Lesser General Public License for more details.
   17    *
   18    * You should have received a copy of the GNU Lesser General Public License along with this
   19    * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
   20    * Boston, MA 02111-1307, USA.
   21    *
   22    * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
   23    * in the United States and other countries.]
   24    *
   25    * ------------
   26    * CSVTokenizer.java
   27    * ------------
   28    * (C) Copyright 2001-2007, by Object Refinery Ltd, Pentaho Corporation and Contributors.
   29    */
   30   package org.jfree.report.util;
   31   
   32   import java.util.Enumeration;
   33   import java.util.NoSuchElementException;
   34   
   35   /**
   36    * The csv tokenizer class allows an application to break a Comma Separated Value format into tokens. The tokenization
   37    * method is much simpler than the one used by the <code>StringTokenizer</code> class. The <code>CSVTokenizer</code>
   38    * methods do not distinguish among identifiers, numbers, and quoted strings, nor do they recognize and skip comments.
   39    * <p/>
   40    * The set of separator (the characters that separate tokens) may be specified either at creation time or on a per-token
   41    * basis.
   42    * <p/>
   43    * An instance of <code>CSVTokenizer</code> behaves in one of two ways, depending on whether it was created with the
   44    * <code>returnSeparators</code> flag having the value <code>true</code> or <code>false</code>: <ul> <li>If the flag is
   45    * <code>false</code>, delimiter characters serve to separate tokens. A token is a maximal sequence of consecutive
   46    * characters that are not separator. <li>If the flag is <code>true</code>, delimiter characters are themselves
   47    * considered to be tokens. A token is thus either one delimiter character, or a maximal sequence of consecutive
   48    * characters that are not separator. </ul><p> A <tt>CSVTokenizer</tt> object internally maintains a current position
   49    * within the string to be tokenized. Some operations advance this current position past the characters processed.<p> A
   50    * token is returned by taking a substring of the string that was used to create the <tt>CSVTokenizer</tt> object.
   51    * <p/>
   52    * The following is one example of the use of the tokenizer. The code:
   53    * <blockquote><pre>
   54    *     CSVTokenizer csvt = new CSVTokenizer("this,is,a,test");
   55    *     while (csvt.hasMoreTokens()) {
   56    *         println(csvt.nextToken());
   57    *     }
   58    * </pre></blockquote>
   59    * <p/>
   60    * prints the following output:
   61    * <blockquote><pre>
   62    *     this
   63    *     is
   64    *     a
   65    *     test
   66    * </pre></blockquote>
   67    *
   68    * @author abupon
   69    * @deprecated This class moved into LibBase. This class here will be removed in 0.8.11.
   70    */
   71   public class CSVTokenizer implements Enumeration
   72   {
   73     /**
   74      * The complete record that should be separated into elements.
   75      */
   76     private String record;
   77     /**
   78      * The separator.
   79      */
   80     private String separator;
   81     /**
   82      * The quoting char.
   83      */
   84     private String quate;
   85   
   86     /**
   87      * the current parsing position.
   88      */
   89     private int currentIndex;
   90   
   91     /**
   92      * A flag indicating that the current parse position is before the start.
   93      */
   94     private boolean beforeStart;
   95   
   96     /**
   97      * A possible separator constant.
   98      */
   99     public static final String SEPARATOR_COMMA = ",";
  100     /**
  101      * A possible separator constant.
  102      */
  103     public static final String SEPARATOR_TAB = "\t";
  104     /**
  105      * A possible separator constant.
  106      */
  107     public static final String SEPARATOR_SPACE = " ";
  108   
  109     /**
  110      * A possible quote character constant.
  111      */
  112     public static final String DOUBLE_QUATE = "\"";
  113     /**
  114      * A possible quote character constant.
  115      */
  116     public static final String SINGLE_QUATE = "'";
  117   
  118     /**
  119      * Constructs a csv tokenizer for the specified string. <code>theSeparator</code> argument is the separator for
  120      * separating tokens.
  121      * <p/>
  122      * If the <code>returnSeparators</code> flag is <code>true</code>, then the separator string is also returned as
  123      * tokens. separator is returned as a string. If the flag is <code>false</code>, the separator string is skipped and
  124      * only serve as separator between tokens.
  125      *
  126      * @param aString      a string to be parsed.
  127      * @param theSeparator the separator (CSVTokenizer.SEPARATOR_COMMA, CSVTokenizer.TAB, CSVTokenizer.SPACE, etc.).
  128      * @param theQuate     the quate (CSVTokenizer.SINGLE_QUATE, CSVTokenizer.DOUBLE_QUATE, etc.).
  129      */
  130     public CSVTokenizer(final String aString, final String theSeparator,
  131                         final String theQuate)
  132     {
  133       if (aString == null)
  134       {
  135         throw new NullPointerException("The given string is null");
  136       }
  137       if (theSeparator == null)
  138       {
  139         throw new NullPointerException("The given separator is null");
  140       }
  141       if (theQuate == null)
  142       {
  143         throw new NullPointerException("The given quate is null");
  144       }
  145       this.record = aString.trim();
  146       this.separator = theSeparator;
  147       this.quate = theQuate;
  148       this.currentIndex = 0;
  149       this.beforeStart = true;
  150     }
  151   
  152     /**
  153      * Constructs a csv tokenizer for the specified string. The characters in the <code>theSeparator</code> argument are
  154      * the separator for separating tokens. Separator string themselves will not be treated as tokens.
  155      *
  156      * @param aString      a string to be parsed.
  157      * @param theSeparator the separator (CSVTokenizer.SEPARATOR_COMMA, CSVTokenizer.TAB, CSVTokenizer.SPACE, etc.).
  158      */
  159     public CSVTokenizer(final String aString, final String theSeparator)
  160     {
  161       this(aString, theSeparator, CSVTokenizer.DOUBLE_QUATE);
  162     }
  163   
  164     /**
  165      * Constructs a string tokenizer for the specified string. The tokenizer uses the default separator set, which is
  166      * <code>CSVTokenizer.SEPARATOR_COMMA</code>. Separator string themselves will not be treated as tokens.
  167      *
  168      * @param aString a string to be parsed.
  169      */
  170     public CSVTokenizer(final String aString)
  171     {
  172       this(aString, CSVTokenizer.SEPARATOR_COMMA);
  173     }
  174   
  175     /**
  176      * Tests if there are more tokens available from this tokenizer's string. If this method returns <tt>true</tt>, then a
  177      * subsequent call to <tt>nextToken</tt> with no argument will successfully return a token.
  178      *
  179      * @return <code>true</code> if and only if there is at least one token in the string after the current position;
  180      *         <code>false</code> otherwise.
  181      */
  182     public boolean hasMoreTokens()
  183     {
  184       return (this.currentIndex < this.record.length());
  185     }
  186   
  187     /**
  188      * Returns the next token from this string tokenizer.
  189      *
  190      * @return the next token from this string tokenizer.
  191      * @throws NoSuchElementException   if there are no more tokens in this tokenizer's string.
  192      * @throws IllegalArgumentException if given parameter string format was wrong
  193      */
  194     public String nextToken()
  195         throws NoSuchElementException, IllegalArgumentException
  196     {
  197   
  198       if (!this.hasMoreTokens())
  199       {
  200         throw new NoSuchElementException();
  201       }
  202   
  203       if (beforeStart == false)
  204       {
  205         currentIndex += this.separator.length();
  206       }
  207       else
  208       {
  209         beforeStart = false;
  210       }
  211   
  212       if (this.record.startsWith(this.quate, this.currentIndex))
  213       {
  214         final StringBuffer token = new StringBuffer(100);
  215         String rec = this.record.substring(this.currentIndex + this.quate.length());
  216   
  217         while (true)
  218         {
  219           final int end = rec.indexOf(this.quate);
  220           if (end < 0)
  221           {
  222             throw new IllegalArgumentException("Illegal format");
  223           }
  224   
  225           if (!rec.startsWith(this.quate, end + 1))
  226           {
  227             token.append(rec.substring(0, end));
  228             break;
  229           }
  230           token.append(rec.substring(0, end + 1));
  231           rec = rec.substring(end + this.quate.length() * 2);
  232           this.currentIndex++;
  233         }
  234   
  235         this.currentIndex += (token.length() + this.quate.length() * 2);
  236         return token.toString();
  237       }
  238   
  239       final int end = this.record.indexOf(this.separator, this.currentIndex);
  240       if (end >= 0)
  241       {
  242         final int start = this.currentIndex;
  243         final String token = this.record.substring(start, end);
  244         this.currentIndex = end;
  245         return token;
  246       }
  247       else
  248       {
  249         final int start = this.currentIndex;
  250         final String token = this.record.substring(start);
  251         this.currentIndex = this.record.length();
  252         return token;
  253       }
  254     }
  255   
  256     /**
  257      * Returns the next token in this string tokenizer's string. First, the set of characters considered to be separator
  258      * by this <tt>CSVTokenizer</tt> object is changed to be the characters in the string <tt>separator</tt>. Then the
  259      * next token in the string after the current position is returned. The current position is advanced beyond the
  260      * recognized token.  The new delimiter set remains the default after this call.
  261      *
  262      * @param theSeparator the new separator.
  263      * @return the next token, after switching to the new delimiter set.
  264      * @throws java.util.NoSuchElementException
  265      *          if there are no more tokens in this tokenizer's string.
  266      */
  267     public String nextToken(final String theSeparator)
  268     {
  269       separator = theSeparator;
  270       return nextToken();
  271     }
  272   
  273     /**
  274      * Returns the same value as the <code>hasMoreTokens</code> method. It exists so that this class can implement the
  275      * <code>Enumeration</code> interface.
  276      *
  277      * @return <code>true</code> if there are more tokens; <code>false</code> otherwise.
  278      * @see java.util.Enumeration
  279      * @see CSVTokenizer#hasMoreTokens()
  280      */
  281     public boolean hasMoreElements()
  282     {
  283       return hasMoreTokens();
  284     }
  285   
  286     /**
  287      * Returns the same value as the <code>nextToken</code> method, except that its declared return value is
  288      * <code>Object</code> rather than <code>String</code>. It exists so that this class can implement the
  289      * <code>Enumeration</code> interface.
  290      *
  291      * @return the next token in the string.
  292      * @throws java.util.NoSuchElementException
  293      *          if there are no more tokens in this tokenizer's string.
  294      * @see java.util.Enumeration
  295      * @see CSVTokenizer#nextToken()
  296      */
  297     public Object nextElement()
  298     {
  299       return nextToken();
  300     }
  301   
  302     /**
  303      * Calculates the number of times that this tokenizer's <code>nextToken</code> method can be called before it
  304      * generates an exception. The current position is not advanced.
  305      *
  306      * @return the number of tokens remaining in the string using the current delimiter set.
  307      * @see CSVTokenizer#nextToken()
  308      */
  309     public int countTokens()
  310     {
  311       int count = 0;
  312   
  313       final int preserve = this.currentIndex;
  314       final boolean preserveStart = this.beforeStart;
  315       while (this.hasMoreTokens())
  316       {
  317         this.nextToken();
  318         count++;
  319       }
  320       this.currentIndex = preserve;
  321       this.beforeStart = preserveStart;
  322   
  323       return count;
  324     }
  325   
  326     /**
  327      * Returns the quate.
  328      *
  329      * @return char
  330      */
  331     public String getQuate()
  332     {
  333       return this.quate;
  334     }
  335   
  336     /**
  337      * Sets the quate.
  338      *
  339      * @param quate The quate to set
  340      */
  341     public void setQuate(final String quate)
  342     {
  343       this.quate = quate;
  344     }
  345   }

Save This Page
Home » pentaho-reporting-engine-classic-0.8.10 » org » jfree » report » util » [javadoc | source]