Save This Page
Home » openjdk-7 » com.sun.org.apache.xml.internal » serialize » [javadoc | source]
    1   /*
    2    * reserved comment block
    3    * DO NOT REMOVE OR ALTER!
    4    */
    5   /*
    6    * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
    7    *
    8    * Licensed under the Apache License, Version 2.0 (the "License");
    9    * you may not use this file except in compliance with the License.
   10    * You may obtain a copy of the License at
   11    *
   12    *      http://www.apache.org/licenses/LICENSE-2.0
   13    *
   14    * Unless required by applicable law or agreed to in writing, software
   15    * distributed under the License is distributed on an "AS IS" BASIS,
   16    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   17    * See the License for the specific language governing permissions and
   18    * limitations under the License.
   19    */
   20   
   21   
   22   
   23   // Sep 14, 2000:
   24   //  Fixed problem with namespace handling. Contributed by
   25   //  David Blondeau <blondeau@intalio.com>
   26   // Sep 14, 2000:
   27   //  Fixed serializer to report IO exception directly, instead at
   28   //  the end of document processing.
   29   //  Reported by Patrick Higgins <phiggins@transzap.com>
   30   // Aug 21, 2000:
   31   //  Fixed bug in startDocument not calling prepare.
   32   //  Reported by Mikael Staldal <d96-mst-ingen-reklam@d.kth.se>
   33   // Aug 21, 2000:
   34   //  Added ability to omit DOCTYPE declaration.
   35   
   36   
   37   package com.sun.org.apache.xml.internal.serialize;
   38   
   39   
   40   import java.io.IOException;
   41   import java.io.OutputStream;
   42   import java.io.Writer;
   43   
   44   import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
   45   import com.sun.org.apache.xerces.internal.impl.Constants;
   46   import com.sun.org.apache.xerces.internal.util.NamespaceSupport;
   47   import com.sun.org.apache.xerces.internal.util.SymbolTable;
   48   import com.sun.org.apache.xerces.internal.util.XML11Char;
   49   import com.sun.org.apache.xerces.internal.util.XMLChar;
   50   import org.xml.sax.SAXException;
   51   import org.w3c.dom.DOMError;
   52   
   53   /**
   54    * Implements an XML serializer supporting both DOM and SAX pretty
   55    * serializing. For usage instructions see {@link Serializer}.
   56    * <p>
   57    * If an output stream is used, the encoding is taken from the
   58    * output format (defaults to <tt>UTF-8</tt>). If a writer is
   59    * used, make sure the writer uses the same encoding (if applies)
   60    * as specified in the output format.
   61    * <p>
   62    * The serializer supports both DOM and SAX. SAX serializing is done by firing
   63    * SAX events and using the serializer as a document handler. DOM serializing is done
   64    * by calling {@link #serialize(Document)} or by using DOM Level 3
   65    * {@link org.w3c.dom.ls.DOMSerializer} and
   66    * serializing with {@link org.w3c.dom.ls.DOMSerializer#write},
   67    * {@link org.w3c.dom.ls.DOMSerializer#writeToString}.
   68    * <p>
   69    * If an I/O exception occurs while serializing, the serializer
   70    * will not throw an exception directly, but only throw it
   71    * at the end of serializing (either DOM or SAX's {@link
   72    * org.xml.sax.DocumentHandler#endDocument}.
   73    * <p>
   74    * For elements that are not specified as whitespace preserving,
   75    * the serializer will potentially break long text lines at space
   76    * boundaries, indent lines, and serialize elements on separate
   77    * lines. Line terminators will be regarded as spaces, and
   78    * spaces at beginning of line will be stripped.
   79    * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
   80    * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
   81    * @author Elena Litani IBM
   82    * @see Serializer
   83    */
   84   public class XML11Serializer
   85   extends XMLSerializer {
   86   
   87       //
   88       // constants
   89       //
   90   
   91       protected static final boolean DEBUG = false;
   92   
   93       //
   94       // data
   95       //
   96   
   97       //
   98       // DOM Level 3 implementation: variables intialized in DOMSerializerImpl
   99       //
  100   
  101       /** stores namespaces in scope */
  102       protected NamespaceSupport fNSBinder;
  103   
  104       /** stores all namespace bindings on the current element */
  105       protected NamespaceSupport fLocalNSBinder;
  106   
  107       /** symbol table for serialization */
  108       protected SymbolTable fSymbolTable;
  109   
  110       // is node dom level 1 node?
  111       protected boolean fDOML1 = false;
  112       // counter for new prefix names
  113       protected int fNamespaceCounter = 1;
  114       protected final static String PREFIX = "NS";
  115   
  116       /**
  117        * Controls whether namespace fixup should be performed during
  118        * the serialization.
  119        * NOTE: if this field is set to true the following
  120        * fields need to be initialized: fNSBinder, fLocalNSBinder, fSymbolTable,
  121        * XMLSymbols.EMPTY_STRING, fXmlSymbol, fXmlnsSymbol, fNamespaceCounter.
  122        */
  123       protected boolean fNamespaces = false;
  124   
  125   
  126       private boolean fPreserveSpace;
  127   
  128   
  129       /**
  130        * Constructs a new serializer. The serializer cannot be used without
  131        * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
  132        * first.
  133        */
  134       public XML11Serializer() {
  135           super( );
  136           _format.setVersion("1.1");
  137       }
  138   
  139   
  140       /**
  141        * Constructs a new serializer. The serializer cannot be used without
  142        * calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
  143        * first.
  144        */
  145       public XML11Serializer( OutputFormat format ) {
  146           super( format );
  147           _format.setVersion("1.1");
  148       }
  149   
  150   
  151       /**
  152        * Constructs a new serializer that writes to the specified writer
  153        * using the specified output format. If <tt>format</tt> is null,
  154        * will use a default output format.
  155        *
  156        * @param writer The writer to use
  157        * @param format The output format to use, null for the default
  158        */
  159       public XML11Serializer( Writer writer, OutputFormat format ) {
  160           super( writer, format );
  161           _format.setVersion("1.1");
  162       }
  163   
  164   
  165       /**
  166        * Constructs a new serializer that writes to the specified output
  167        * stream using the specified output format. If <tt>format</tt>
  168        * is null, will use a default output format.
  169        *
  170        * @param output The output stream to use
  171        * @param format The output format to use, null for the default
  172        */
  173       public XML11Serializer( OutputStream output, OutputFormat format ) {
  174           super( output, format != null ? format : new OutputFormat( Method.XML, null, false ) );
  175           _format.setVersion("1.1");
  176       }
  177   
  178       //-----------------------------------------//
  179       // SAX content handler serializing methods //
  180       //-----------------------------------------//
  181   
  182   
  183       public void characters( char[] chars, int start, int length )
  184           throws SAXException
  185       {
  186           ElementState state;
  187   
  188           try {
  189               state = content();
  190   
  191               // Check if text should be print as CDATA section or unescaped
  192               // based on elements listed in the output format (the element
  193               // state) or whether we are inside a CDATA section or entity.
  194   
  195               if ( state.inCData || state.doCData ) {
  196                   int          saveIndent;
  197   
  198                   // Print a CDATA section. The text is not escaped, but ']]>'
  199                   // appearing in the code must be identified and dealt with.
  200                   // The contents of a text node is considered space preserving.
  201                   if ( ! state.inCData ) {
  202                       _printer.printText( "<![CDATA[" );
  203                       state.inCData = true;
  204                   }
  205                   saveIndent = _printer.getNextIndent();
  206                   _printer.setNextIndent( 0 );
  207                   char ch;
  208                   final int end = start + length;
  209                   for ( int index = start; index < end; ++index ) {
  210                       ch = chars[index];
  211                       if ( ch == ']' && index + 2 < end &&
  212                           chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
  213                           _printer.printText("]]]]><![CDATA[>");
  214                           index +=2;
  215                           continue;
  216                       }
  217                       if (!XML11Char.isXML11Valid(ch)) {
  218                           // check if it is surrogate
  219                           if (++index < end) {
  220                               surrogates(ch, chars[index]);
  221                           }
  222                           else {
  223                               fatalError("The character '"+(char)ch+"' is an invalid XML character");
  224                           }
  225                           continue;
  226                       } else {
  227                           if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
  228                               _printer.printText((char)ch);
  229                           } else {
  230                               // The character is not printable -- split CDATA section
  231                               _printer.printText("]]>&#x");
  232                               _printer.printText(Integer.toHexString(ch));
  233                               _printer.printText(";<![CDATA[");
  234                           }
  235                       }
  236                   }
  237                   _printer.setNextIndent( saveIndent );
  238   
  239               } else {
  240   
  241                   int saveIndent;
  242   
  243                   if ( state.preserveSpace ) {
  244                       // If preserving space then hold of indentation so no
  245                       // excessive spaces are printed at line breaks, escape
  246                       // the text content without replacing spaces and print
  247                       // the text breaking only at line breaks.
  248                       saveIndent = _printer.getNextIndent();
  249                       _printer.setNextIndent( 0 );
  250                       printText( chars, start, length, true, state.unescaped );
  251                       _printer.setNextIndent( saveIndent );
  252                   } else {
  253                       printText( chars, start, length, false, state.unescaped );
  254                   }
  255               }
  256           } catch ( IOException except ) {
  257               throw new SAXException( except );
  258           }
  259       }
  260   
  261   
  262       //
  263       // overwrite printing functions to make sure serializer prints out valid XML
  264       //
  265       protected void printEscaped( String source ) throws IOException {
  266           int length = source.length();
  267           for ( int i = 0 ; i < length ; ++i ) {
  268               int ch = source.charAt(i);
  269               if (!XML11Char.isXML11Valid(ch)) {
  270                   if (++i <length) {
  271                       surrogates(ch, source.charAt(i));
  272                   } else {
  273                       fatalError("The character '"+(char)ch+"' is an invalid XML character");
  274                   }
  275                   continue;
  276               }
  277               if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 0x2028){
  278                                   printHex(ch);
  279                           } else if (ch == '<') {
  280                                   _printer.printText("&lt;");
  281                           } else if (ch == '&') {
  282                                   _printer.printText("&amp;");
  283                           } else if (ch == '"') {
  284                                   _printer.printText("&quot;");
  285                           } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) {
  286                                   _printer.printText((char) ch);
  287                           } else {
  288                                   printHex(ch);
  289                           }
  290           }
  291       }
  292   
  293       protected final void printCDATAText(String text) throws IOException {
  294           int length = text.length();
  295           char ch;
  296   
  297           for (int index = 0; index < length; ++index) {
  298               ch = text.charAt(index);
  299   
  300               if (ch == ']'
  301                   && index + 2 < length
  302                   && text.charAt(index + 1) == ']'
  303                   && text.charAt(index + 2) == '>') { // check for ']]>'
  304                   if (fDOMErrorHandler != null){
  305                       // REVISIT: this means that if DOM Error handler is not registered we don't report any
  306                       // fatal errors and might serialize not wellformed document
  307                   if ((features & DOMSerializerImpl.SPLITCDATA) == 0
  308                       && (features & DOMSerializerImpl.WELLFORMED) == 0) {
  309                       // issue fatal error
  310                       String msg =
  311                           DOMMessageFormatter.formatMessage(
  312                               DOMMessageFormatter.SERIALIZER_DOMAIN,
  313                               "EndingCDATA",
  314                               null);
  315                       modifyDOMError(
  316                           msg,
  317                           DOMError.SEVERITY_FATAL_ERROR,
  318                           null, fCurrentNode);
  319                       boolean continueProcess =
  320                           fDOMErrorHandler.handleError(fDOMError);
  321                       if (!continueProcess) {
  322                           throw new IOException();
  323                       }
  324                   } else {
  325                       // issue warning
  326                       String msg =
  327                           DOMMessageFormatter.formatMessage(
  328                               DOMMessageFormatter.SERIALIZER_DOMAIN,
  329                               "SplittingCDATA",
  330                               null);
  331                       modifyDOMError(
  332                           msg,
  333                           DOMError.SEVERITY_WARNING,
  334                           null, fCurrentNode);
  335                       fDOMErrorHandler.handleError(fDOMError);
  336                   }
  337                   }
  338                   // split CDATA section
  339                   _printer.printText("]]]]><![CDATA[>");
  340                   index += 2;
  341                   continue;
  342               }
  343   
  344               if (!XML11Char.isXML11Valid(ch)) {
  345                   // check if it is surrogate
  346                   if (++index < length) {
  347                       surrogates(ch, text.charAt(index));
  348                   } else {
  349                       fatalError(
  350                           "The character '"
  351                               + (char) ch
  352                               + "' is an invalid XML character");
  353                   }
  354                   continue;
  355               } else {
  356                   if (_encodingInfo.isPrintable((char) ch)
  357                       && XML11Char.isXML11ValidLiteral(ch)) {
  358                       _printer.printText((char) ch);
  359                   } else {
  360   
  361                       // The character is not printable -- split CDATA section
  362                       _printer.printText("]]>&#x");
  363                       _printer.printText(Integer.toHexString(ch));
  364                       _printer.printText(";<![CDATA[");
  365                   }
  366               }
  367           }
  368       }
  369   
  370   
  371       // note that this "int" should, in all cases, be a char.
  372       // REVISIT:  make it a char...
  373       protected final void printXMLChar( int ch ) throws IOException {
  374   
  375           if (ch == '\r' || ch == 0x0085 || ch == 0x2028) {
  376                           printHex(ch);
  377           } else if ( ch == '<') {
  378               _printer.printText("&lt;");
  379           } else if (ch == '&') {
  380               _printer.printText("&amp;");
  381                   } else if (ch == '>'){
  382                           // character sequence "]]>" can't appear in content, therefore
  383                           // we should escape '>'
  384                           _printer.printText("&gt;");
  385           } else if ( _encodingInfo.isPrintable((char)ch) && XML11Char.isXML11ValidLiteral(ch)) {
  386               _printer.printText((char)ch);
  387           } else {
  388                printHex(ch);
  389           }
  390       }
  391   
  392   
  393   
  394       protected final void surrogates(int high, int low) throws IOException{
  395           if (XMLChar.isHighSurrogate(high)) {
  396               if (!XMLChar.isLowSurrogate(low)) {
  397                   //Invalid XML
  398                   fatalError("The character '"+(char)low+"' is an invalid XML character");
  399               }
  400               else {
  401                   int supplemental = XMLChar.supplemental((char)high, (char)low);
  402                   if (!XML11Char.isXML11Valid(supplemental)) {
  403                       //Invalid XML
  404                       fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
  405                   }
  406                   else {
  407                       if (content().inCData ) {
  408                           _printer.printText("]]>&#x");
  409                           _printer.printText(Integer.toHexString(supplemental));
  410                           _printer.printText(";<![CDATA[");
  411                       }
  412                       else {
  413                                                   printHex(supplemental);
  414                       }
  415                   }
  416               }
  417           } else {
  418               fatalError("The character '"+(char)high+"' is an invalid XML character");
  419           }
  420   
  421       }
  422   
  423   
  424       protected void printText( String text, boolean preserveSpace, boolean unescaped )
  425       throws IOException {
  426           int index;
  427           char ch;
  428           int length = text.length();
  429           if ( preserveSpace ) {
  430               // Preserving spaces: the text must print exactly as it is,
  431               // without breaking when spaces appear in the text and without
  432               // consolidating spaces. If a line terminator is used, a line
  433               // break will occur.
  434               for ( index = 0 ; index < length ; ++index ) {
  435                   ch = text.charAt( index );
  436                   if (!XML11Char.isXML11Valid(ch)) {
  437                       // check if it is surrogate
  438                       if (++index <length) {
  439                           surrogates(ch, text.charAt(index));
  440                       } else {
  441                           fatalError("The character '"+(char)ch+"' is an invalid XML character");
  442                       }
  443                       continue;
  444                   }
  445                   if ( unescaped  && XML11Char.isXML11ValidLiteral(ch)) {
  446                       _printer.printText( ch );
  447                   } else
  448                       printXMLChar( ch );
  449               }
  450           } else {
  451               // Not preserving spaces: print one part at a time, and
  452               // use spaces between parts to break them into different
  453               // lines. Spaces at beginning of line will be stripped
  454               // by printing mechanism. Line terminator is treated
  455               // no different than other text part.
  456               for ( index = 0 ; index < length ; ++index ) {
  457                   ch = text.charAt( index );
  458                   if (!XML11Char.isXML11Valid(ch)) {
  459                       // check if it is surrogate
  460                       if (++index <length) {
  461                           surrogates(ch, text.charAt(index));
  462                       } else {
  463                           fatalError("The character '"+(char)ch+"' is an invalid XML character");
  464                       }
  465                       continue;
  466                   }
  467   
  468                   if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
  469                       _printer.printText( ch );
  470                   else
  471                       printXMLChar( ch);
  472               }
  473           }
  474       }
  475   
  476   
  477   
  478       protected void printText( char[] chars, int start, int length,
  479                                 boolean preserveSpace, boolean unescaped ) throws IOException {
  480           int index;
  481           char ch;
  482   
  483           if ( preserveSpace ) {
  484               // Preserving spaces: the text must print exactly as it is,
  485               // without breaking when spaces appear in the text and without
  486               // consolidating spaces. If a line terminator is used, a line
  487               // break will occur.
  488               while ( length-- > 0 ) {
  489                   ch = chars[start++];
  490                   if (!XML11Char.isXML11Valid(ch)) {
  491                       // check if it is surrogate
  492                       if ( length-- > 0) {
  493                           surrogates(ch, chars[start++]);
  494                       } else {
  495                           fatalError("The character '"+(char)ch+"' is an invalid XML character");
  496                       }
  497                       continue;
  498                   }
  499                   if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
  500                       _printer.printText( ch );
  501                   else
  502                       printXMLChar( ch );
  503               }
  504           } else {
  505               // Not preserving spaces: print one part at a time, and
  506               // use spaces between parts to break them into different
  507               // lines. Spaces at beginning of line will be stripped
  508               // by printing mechanism. Line terminator is treated
  509               // no different than other text part.
  510               while ( length-- > 0 ) {
  511                   ch = chars[start++];
  512                   if (!XML11Char.isXML11Valid(ch)) {
  513                       // check if it is surrogate
  514                       if ( length-- > 0) {
  515                           surrogates(ch, chars[start++]);
  516                       } else {
  517                           fatalError("The character '"+(char)ch+"' is an invalid XML character");
  518                       }
  519                       continue;
  520                   }
  521   
  522                   if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
  523                       _printer.printText( ch );
  524                   else
  525                       printXMLChar( ch );
  526               }
  527           }
  528       }
  529   
  530   
  531       public boolean reset() {
  532           super.reset();
  533           return true;
  534   
  535       }
  536   
  537   }

Save This Page
Home » openjdk-7 » com.sun.org.apache.xml.internal » serialize » [javadoc | source]