Save This Page
Home » Xerces-J-src.2.9.1 » sax » [javadoc | source]
    1   /*-- 
    2   
    3    Copyright (C) 2000 Brett McLaughlin & Jason Hunter.
    4    All rights reserved.
    5    
    6    Redistribution and use in source and binary forms, with or without
    7    modification, are permitted provided that the following conditions
    8    are met:
    9    
   10    1. Redistributions of source code must retain the above copyright
   11       notice, this list of conditions, and the following disclaimer.
   12    
   13    2. Redistributions in binary form must reproduce the above copyright
   14       notice, this list of conditions, and the disclaimer that follows 
   15       these conditions in the documentation and/or other materials 
   16       provided with the distribution.
   17   
   18    3. The name "JDOM" must not be used to endorse or promote products
   19       derived from this software without prior written permission.  For
   20       written permission, please contact license@jdom.org.
   21    
   22    4. Products derived from this software may not be called "JDOM", nor
   23       may "JDOM" appear in their name, without prior written permission
   24       from the JDOM Project Management (pm@jdom.org).
   25    
   26    In addition, we request (but do not require) that you include in the 
   27    end-user documentation provided with the redistribution and/or in the 
   28    software itself an acknowledgement equivalent to the following:
   29        "This product includes software developed by the
   30         JDOM Project (http://www.jdom.org/)."
   31    Alternatively, the acknowledgment may be graphical using the logos 
   32    available at http://www.jdom.org/images/logos.
   33   
   34    THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   35    WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   36    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   37    DISCLAIMED.  IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
   38    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   39    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   40    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   41    USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   42    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   43    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   44    OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   45    SUCH DAMAGE.
   46   
   47    This software consists of voluntary contributions made by many 
   48    individuals on behalf of the JDOM Project and was originally 
   49    created by Brett McLaughlin <brett@jdom.org> and 
   50    Jason Hunter <jhunter@jdom.org>.  For more information on the 
   51    JDOM Project, please see <http://www.jdom.org/>.
   52    
   53    */
   54   package sax;
   55   
   56   import java.util.Stack;
   57   
   58   import org.xml.sax.Attributes;
   59   import org.xml.sax.SAXException;
   60   import org.xml.sax.XMLReader;
   61   
   62   
   63   /**
   64    * Filter for removing formatting from data- or field-oriented XML. 
   65    *
   66    * <i>Code and comments adapted from DataWriter-0.2, written
   67    * by David Megginson and released into the public domain,
   68    * without warranty.</i>
   69    *
   70    * <p>This filter removes leading and trailing whitespace from
   71    * field-oriented XML without mixed content. Note that this class will
   72    * likely not yield appropriate results for document-oriented XML like
   73    * XHTML pages, which mix character data and elements together.</p>
   74    *
   75    * @see DataFormatFilter
   76    */
   77   public class DataUnformatFilter extends XMLFilterBase
   78   {
   79       
   80       
   81       
   82       ////////////////////////////////////////////////////////////////////
   83       // Constructors.
   84       ////////////////////////////////////////////////////////////////////
   85   
   86   
   87       /**
   88        * Create a new filter.
   89        */
   90       public DataUnformatFilter()
   91       {
   92       }
   93   
   94   
   95       /**
   96        * Create a new filter.
   97        *
   98        * <p>Use the XMLReader provided as the source of events.</p>
   99        *
  100        * @param xmlreader The parent in the filter chain.
  101        */
  102       public DataUnformatFilter(XMLReader xmlreader)
  103       {
  104           super(xmlreader);
  105       }
  106   
  107   
  108   
  109       ////////////////////////////////////////////////////////////////////
  110       // Public methods.
  111       ////////////////////////////////////////////////////////////////////
  112   
  113   
  114       /**
  115        * Reset the filter so that it can be reused.
  116        *
  117        * <p>This method is especially useful if the filter failed
  118        * with an exception the last time through.</p>
  119        */
  120       public void reset ()
  121       {
  122           state = SEEN_NOTHING;
  123           stateStack = new Stack();
  124           whitespace = new StringBuffer();
  125       }
  126   
  127   
  128   
  129       ////////////////////////////////////////////////////////////////////
  130       // Methods from org.xml.sax.ContentHandler.
  131       ////////////////////////////////////////////////////////////////////
  132   
  133       
  134       /**
  135        * Filter a start document event. 
  136        *
  137        * <p>Reset state and pass the event on for further processing.</p>
  138        *
  139        * @exception org.xml.sax.SAXException If a filter
  140        *            further down the chain raises an exception.
  141        * @see org.xml.sax.ContentHandler#startDocument
  142        */
  143       public void startDocument ()
  144       throws SAXException
  145       {
  146           reset();
  147           super.startDocument();
  148       }
  149   
  150   
  151       /**
  152        * Filter a start element event.
  153        *
  154        * @param uri The element's Namespace URI.
  155        * @param localName The element's local name.
  156        * @param qName The element's qualified (prefixed) name.
  157        * @param atts The element's attribute list.
  158        * @exception org.xml.sax.SAXException If a filter
  159        *            further down the chain raises an exception.
  160        * @see org.xml.sax.ContentHandler#startElement
  161        */
  162       public void startElement (String uri, String localName,
  163                                 String qName, Attributes atts)
  164       throws SAXException
  165       {
  166           clearWhitespace();
  167           stateStack.push(SEEN_ELEMENT);
  168           state = SEEN_NOTHING;
  169           super.startElement(uri, localName, qName, atts);
  170       }
  171   
  172   
  173       /**
  174        * Filter an end element event.
  175        *
  176        * @param uri The element's Namespace URI.
  177        * @param localName The element's local name.
  178        * @param qName The element's qualified (prefixed) name.
  179        * @exception org.xml.sax.SAXException If a filter
  180        *            further down the chain raises an exception.
  181        * @see org.xml.sax.ContentHandler#endElement
  182        */
  183       public void endElement (String uri, String localName, String qName)
  184       throws SAXException
  185       {
  186           if (state == SEEN_ELEMENT) {
  187               clearWhitespace();
  188           } else {
  189               emitWhitespace();
  190           }
  191           state = stateStack.pop();
  192           super.endElement(uri, localName, qName);
  193       }
  194   
  195   
  196       /**
  197        * Filter a character data event.
  198        *
  199        * @param ch The characters to write.
  200        * @param start The starting position in the array.
  201        * @param length The number of characters to use.
  202        * @exception org.xml.sax.SAXException If a filter
  203        *            further down the chain raises an exception.
  204        * @see org.xml.sax.ContentHandler#characters
  205        */
  206       public void characters (char ch[], int start, int length)
  207       throws SAXException
  208       {
  209           if (state != SEEN_DATA) {
  210   
  211               /* Look for non-whitespace. */
  212   
  213               int end = start + length;
  214               while (end-- > start) {
  215                   if (!isXMLWhitespace(ch[end]))
  216                       break;
  217               }
  218   
  219               /*
  220                * If all the characters are whitespace, save them for later.
  221                * If we've got some data, emit any saved whitespace and update
  222                * our state to show we've seen data.
  223                */
  224   
  225               if (end < start) {
  226                   saveWhitespace(ch, start, length);
  227               } else {
  228                   state = SEEN_DATA;
  229                   emitWhitespace();
  230               }
  231           }
  232   
  233           /* Pass on everything inside a data field. */
  234           
  235           if (state == SEEN_DATA) {
  236               super.characters(ch, start, length);
  237           }
  238       }
  239       
  240       
  241        /**
  242         * Filter an ignorable whitespace event.
  243         *
  244         * @param ch The array of characters to write.
  245         * @param start The starting position in the array.
  246         * @param length The number of characters to write.
  247         * @exception org.xml.sax.SAXException If a filter
  248         *            further down the chain raises an exception.
  249         * @see org.xml.sax.ContentHandler#ignorableWhitespace
  250         */
  251       public void ignorableWhitespace (char ch[], int start, int length)
  252       throws SAXException
  253       {
  254           emitWhitespace();
  255           // ignore
  256       }
  257       
  258   
  259       /**
  260        * Filter a processing instruction event.
  261        *
  262        * @param target The PI target.
  263        * @param data The PI data.
  264        * @exception org.xml.sax.SAXException If a filter
  265        *            further down the chain raises an exception.
  266        * @see org.xml.sax.ContentHandler#processingInstruction
  267        */
  268       public void processingInstruction (String target, String data)
  269       throws SAXException
  270       {
  271           emitWhitespace();
  272           super.processingInstruction(target, data);
  273       }
  274   
  275   
  276   
  277       ////////////////////////////////////////////////////////////////////
  278       // Internal methods.
  279       ////////////////////////////////////////////////////////////////////
  280   
  281   
  282       /**
  283        * Saves trailing whitespace.
  284        */
  285       protected void saveWhitespace (char[] ch, int start, int length) {
  286           whitespace.append(ch, start, length);
  287       }
  288   
  289   
  290       /**
  291        * Passes saved whitespace down the filter chain.
  292        */
  293       protected void emitWhitespace ()
  294       throws SAXException
  295       {
  296           char[] data = new char[whitespace.length()];
  297           whitespace.getChars(0, data.length, data, 0);
  298           whitespace.setLength(0);
  299           super.characters(data, 0, data.length);
  300       }
  301   
  302   
  303       /**
  304        * Discards saved whitespace. 
  305        */
  306       protected void clearWhitespace () {
  307           whitespace.setLength(0);
  308       }
  309   
  310   
  311       /**
  312        * Returns <var>true</var> if character is XML whitespace.
  313        */
  314       private boolean isXMLWhitespace (char c)
  315       {
  316           return c == ' ' || c == '\t' || c == '\r' || c == '\n';
  317       }
  318   
  319   
  320   
  321   
  322       ////////////////////////////////////////////////////////////////////
  323       // Constants.
  324       ////////////////////////////////////////////////////////////////////
  325   
  326       private static final Object SEEN_NOTHING = new Object();
  327       private static final Object SEEN_ELEMENT = new Object();
  328       private static final Object SEEN_DATA = new Object();
  329   
  330   
  331       ////////////////////////////////////////////////////////////////////
  332       // Internal state.
  333       ////////////////////////////////////////////////////////////////////
  334   
  335       private Object state = SEEN_NOTHING;
  336       private Stack stateStack = new Stack();
  337   
  338       private StringBuffer whitespace = new StringBuffer();
  339   
  340   }
  341   
  342   // end of DataUnformatFilter.java

Save This Page
Home » Xerces-J-src.2.9.1 » sax » [javadoc | source]