Save This Page
Home » wstx-sources » com.ctc.wstx.sw » [javadoc | source]
    1   /* Woodstox XML processor
    2    *
    3    * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi
    4    *
    5    * Licensed under the License specified in file LICENSE, included with
    6    * the source code.
    7    * You may not use this file except in compliance with the License.
    8    *
    9    * Unless required by applicable law or agreed to in writing, software
   10    * distributed under the License is distributed on an "AS IS" BASIS,
   11    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   12    * See the License for the specific language governing permissions and
   13    * limitations under the License.
   14    */
   15   
   16   package com.ctc.wstx.sw;
   17   
   18   import java.io.IOException;
   19   import java.io.OutputStream;
   20   import java.io.Writer;
   21   import java.text.MessageFormat;
   22   
   23   import javax.xml.stream.XMLStreamException;
   24   
   25   import org.codehaus.stax2.XMLStreamReader2;
   26   import org.codehaus.stax2.io.EscapingWriterFactory;
   27   
   28   import com.ctc.wstx.api.WriterConfig;
   29   import com.ctc.wstx.api.WstxOutputProperties;
   30   import com.ctc.wstx.cfg.ErrorConsts;
   31   import com.ctc.wstx.cfg.OutputConfigFlags;
   32   import com.ctc.wstx.cfg.XmlConsts;
   33   import com.ctc.wstx.exc.WstxIOException;
   34   import com.ctc.wstx.io.TextEscaper;
   35   import com.ctc.wstx.io.WstxInputData;
   36   
   37   /**
   38    * This is the base class for actual physical xml outputters. These
   39    * instances will only handle actual writing (possibly including
   40    * encoding) of the serialized textual xml, and will in general
   41    * not verify content being output. The exception are the
   42    * character-by-character checks that are most efficiently done
   43    * at encoding level (such as character escaping, and checks for
   44    * illegal character combinations), which are handled at this
   45    * level.
   46    *<p>
   47    * Note that implementations can have different operating modes:
   48    * specifically, when dealing with illegal content (such as "--"
   49    * in a comment, "?>" in processing instruction, or "]]>" within
   50    * CDATA section), implementations can do one of 3 things:
   51    * <ul>
   52    *  <li>Fix the problem, by splitting the section (which can be done
   53    *    for CDATA sections, and to some degree, comments)
   54    *   </li>
   55    *  <li>Stop outputting, and return an index to the illegal piece
   56    *    of data (if there is no easy way to fix the problem: for
   57    *    example, for processing instruction)
   58    *   </li>
   59    *  <li>Just output content even though it will not result in
   60    *    well-formed output. This should only be done if the calling
   61    *    application has specifically requested verifications to be
   62    *    disabled.
   63    *   </li>
   64    *  </ul>
   65    */
   66   public abstract class XmlWriter
   67   {
   68       protected final static int SURR1_FIRST = 0xD800;
   69       protected final static int SURR1_LAST = 0xDBFF;
   70       protected final static int SURR2_FIRST = 0xDC00;
   71       protected final static int SURR2_LAST = 0xDFFF;
   72   
   73       protected final static char DEFAULT_QUOTE_CHAR = '"';
   74   
   75       protected final WriterConfig mConfig;
   76       protected final String mEncoding;
   77   
   78       // // // Operating mode: base class needs to know whether
   79       // // // namespaces are support (for entity/PI target validation)
   80   
   81       protected final boolean mNsAware;
   82   
   83       protected final boolean mCheckStructure;
   84       protected final boolean mCheckContent;
   85       protected final boolean mCheckNames;
   86       protected final boolean mFixContent;
   87   
   88       /**
   89        * Whether to escape CR (\r) character.
   90        */
   91       final boolean mEscapeCR;
   92   
   93       /**
   94        * Flag that defines whether close() on this writer should call
   95        * close on the underlying output object (stream, writer)
   96        */
   97       protected final boolean mAutoCloseOutput;
   98   
   99       /**
  100        * Optional escaping writer used for escaping characters like '&lt;'
  101        * '&amp;' and '&gt;' in textual content.
  102        * Constructed if calling code has
  103        * installed a special escaping writer factory for text content.
  104        * Null if the default escaper is to be used.
  105        */
  106       protected Writer mTextWriter;
  107   
  108       /**
  109        * Optional escaping writer used for escaping characters like '&quot;'
  110        * '&amp;' and '&lt;' in attribute values.
  111        * Constructed if calling code has
  112        * installed a special escaping writer factory for text content.
  113        * Null if the default escaper is to be used.
  114        */
  115       protected Writer mAttrValueWriter;
  116   
  117       /**
  118        * Indicates whether output is to be compliant; if false, is to be
  119        * xml 1.0 compliant, if true, xml 1.1 compliant.
  120        */
  121       protected boolean mXml11 = false;
  122   
  123       /**
  124        * Lazy-constructed wrapper object, which will route all calls to
  125        * Writer API, to matching <code>writeRaw</code> methods of this
  126        * XmlWriter instance.
  127        */
  128       protected XmlWriterWrapper mRawWrapper = null;
  129   
  130       /**
  131        * Lazy-constructed wrapper object, which will route all calls to
  132        * Writer API, to matching <code>writeCharacters</code> methods of this
  133        * XmlWriter instance.
  134        */
  135       protected XmlWriterWrapper mTextWrapper = null;
  136   
  137       /*
  138       ///////////////////////////////////////////////////////
  139       // Output location info
  140       ///////////////////////////////////////////////////////
  141        */
  142   
  143       /**
  144        * Number of characters output prior to currently buffered output
  145        */
  146       protected int mLocPastChars = 0;
  147   
  148       protected int mLocRowNr = 1;
  149   
  150       /**
  151        * Offset of the first character on this line. May be negative, if
  152        * the offset was in a buffer that has been flushed out.
  153        */
  154       protected int mLocRowStartOffset = 0;
  155   
  156       /*
  157       ///////////////////////////////////////////////////////
  158       // Life-cycle
  159       ///////////////////////////////////////////////////////
  160        */
  161   
  162       protected XmlWriter(WriterConfig cfg, String encoding, boolean autoclose)
  163           throws IOException
  164       {
  165           mConfig = cfg;
  166           mEncoding = encoding;
  167           mAutoCloseOutput = autoclose;
  168           int flags = cfg.getConfigFlags();
  169           mNsAware = (flags & OutputConfigFlags.CFG_ENABLE_NS) != 0;
  170           mCheckStructure = (flags & OutputConfigFlags.CFG_VALIDATE_STRUCTURE) != 0;
  171           mCheckContent = (flags & OutputConfigFlags.CFG_VALIDATE_CONTENT) != 0;
  172           mCheckNames = (flags & OutputConfigFlags.CFG_VALIDATE_NAMES) != 0;
  173           mFixContent = (flags & OutputConfigFlags.CFG_FIX_CONTENT) != 0;
  174           mEscapeCR = (flags & OutputConfigFlags.CFG_ESCAPE_CR) != 0;
  175   
  176           // Has caller requested any custom text or attr value escaping?
  177   
  178           EscapingWriterFactory f = mConfig.getTextEscaperFactory();
  179           if (f == null) {
  180               mTextWriter = null;
  181           } else {
  182               String enc = (mEncoding == null || mEncoding.length() == 0) ?
  183                   WstxOutputProperties.DEFAULT_OUTPUT_ENCODING : mEncoding;
  184               mTextWriter = f.createEscapingWriterFor(wrapAsRawWriter(), enc);
  185           }
  186   
  187           f = mConfig.getAttrValueEscaperFactory();
  188           if (f == null) {
  189               mAttrValueWriter = null;
  190           } else {
  191               String enc = (mEncoding == null || mEncoding.length() == 0) ?
  192                   WstxOutputProperties.DEFAULT_OUTPUT_ENCODING : mEncoding;
  193               mAttrValueWriter = f.createEscapingWriterFor(wrapAsRawWriter(), enc);
  194           }
  195       }
  196   
  197       /*
  198       ////////////////////////////////////////////////////
  199       // Extra configuration
  200       ////////////////////////////////////////////////////
  201        */
  202   
  203       public void enableXml11() {
  204           mXml11 = true;
  205       }
  206   
  207       /*
  208       ////////////////////////////////////////////////////
  209       // Access to underlying physical output destinations
  210       ////////////////////////////////////////////////////
  211        */
  212   
  213       /**
  214        * @return Underlying OutputStream used for physical output,
  215        *   if the writer was constructed using one
  216        */
  217       protected abstract OutputStream getOutputStream();
  218   
  219       /**
  220        * @return Underlying Writer used for physical output,
  221        *   if the writer was constructed with one, or one was
  222        *   created to be used with an OutputStream.
  223        */
  224       protected abstract Writer getWriter();
  225       
  226       /*
  227       ////////////////////////////////////////////////////
  228       // Basic methods for communicating with underlying
  229       // stream or writer
  230       ////////////////////////////////////////////////////
  231        */
  232   
  233       /**
  234        * Method called to flush the buffer(s), and close the output
  235        * sink (stream or writer).
  236        */
  237       public abstract void close() throws IOException;
  238   
  239       public abstract void flush()
  240           throws IOException;
  241   
  242       public abstract void writeRaw(String str, int offset, int len)
  243           throws IOException;
  244   
  245       public void writeRaw(String str)
  246           throws IOException
  247       {
  248           writeRaw(str, 0, str.length());
  249       }
  250   
  251       public abstract void writeRaw(char[] cbuf, int offset, int len)
  252           throws IOException;
  253   
  254       /*
  255       ////////////////////////////////////////////////////
  256       // Raw, non-verifying write methods; used when
  257       // directly copying trusted content
  258       ////////////////////////////////////////////////////
  259        */
  260   
  261       public abstract void writeCDataStart()
  262           throws IOException;
  263   
  264       public abstract void writeCDataEnd()
  265           throws IOException;
  266   
  267       public abstract void writeCommentStart()
  268           throws IOException;
  269   
  270       public abstract void writeCommentEnd()
  271           throws IOException;
  272   
  273       public abstract void writePIStart(String target, boolean addSpace)
  274           throws IOException;
  275   
  276       public abstract void writePIEnd()
  277           throws IOException;
  278   
  279       /*
  280       ////////////////////////////////////////////////////
  281       // Write methods, non-elem/attr
  282       ////////////////////////////////////////////////////
  283        */
  284   
  285       /**
  286        * @param data Contents of the CDATA section to write out
  287   
  288        * @return offset of the (first) illegal content segment ("]]>") in 
  289        *   passed content, if not in repairing mode; or -1 if none
  290        */
  291       public abstract int writeCData(String data)
  292           throws IOException, XMLStreamException;
  293   
  294       public abstract int writeCData(char[] cbuf, int offset, int len)
  295           throws IOException, XMLStreamException;
  296   
  297       public abstract void writeCharacters(String data)
  298           throws IOException;
  299   
  300       public abstract void writeCharacters(char[] cbuf, int offset, int len)
  301           throws IOException;
  302   
  303       /**
  304        * Method that will try to output the content as specified. If
  305        * the content passed in has embedded "--" in it, it will either
  306        * add an intervening space between consequtive hyphens (if content
  307        * fixing is enabled), or return the offset of the first hyphen in
  308        * multi-hyphen sequence.
  309        */
  310       public abstract int writeComment(String data)
  311           throws IOException, XMLStreamException;
  312   
  313       /**
  314        * Older "legacy" output method for outputting DOCTYPE declaration.
  315        * Assumes that the passed-in String contains a complete DOCTYPE
  316        * declaration properly quoted.
  317        */
  318       public abstract void writeDTD(String data)
  319           throws IOException, XMLStreamException;
  320   
  321       public abstract void writeDTD(String rootName,
  322                                     String systemId, String publicId,
  323                                     String internalSubset)
  324           throws IOException, XMLStreamException;
  325   
  326       public abstract void writeEntityReference(String name)
  327           throws IOException, XMLStreamException;
  328   
  329       public abstract int writePI(String target, String data)
  330           throws IOException, XMLStreamException;
  331   
  332       public abstract void writeXmlDeclaration(String version, String enc, String standalone)
  333           throws IOException;
  334   
  335       /*
  336       ////////////////////////////////////////////////////
  337       // Write methods, elements
  338       ////////////////////////////////////////////////////
  339        */
  340   
  341       /**
  342        *<p>
  343        * Note: can throw XMLStreamException, if name checking is enabled,
  344        * and name is invalid (name check has to be in this writer, not
  345        * caller, since it depends not only on xml limitations, but also
  346        * on encoding limitations)
  347        */
  348       public abstract void writeStartTagStart(String localName)
  349           throws IOException, XMLStreamException;
  350                  
  351       /**
  352        *<p>
  353        * Note: can throw XMLStreamException, if name checking is enabled,
  354        * and name is invalid (name check has to be in this writer, not
  355        * caller, since it depends not only on xml limitations, but also
  356        * on encoding limitations)
  357        */
  358       public abstract void writeStartTagStart(String prefix, String localName)
  359           throws IOException, XMLStreamException;
  360   
  361       public abstract void writeStartTagEnd()
  362           throws IOException;
  363   
  364       public abstract void writeStartTagEmptyEnd()
  365           throws IOException;
  366   
  367       public abstract void writeEndTag(String localName)
  368           throws IOException;
  369   
  370       public abstract void writeEndTag(String prefix, String localName)
  371           throws IOException;
  372   
  373       /*
  374       ////////////////////////////////////////////////////
  375       // Write methods, attributes/ns
  376       ////////////////////////////////////////////////////
  377        */
  378   
  379       /**
  380        *<p>
  381        * Note: can throw XMLStreamException, if name checking is enabled,
  382        * and name is invalid (name check has to be in this writer, not
  383        * caller, since it depends not only on xml limitations, but also
  384        * on encoding limitations)
  385        */
  386       public abstract void writeAttribute(String localName, String value)
  387           throws IOException, XMLStreamException;
  388   
  389       public abstract void writeAttribute(String localName, char[] value, int offset, int len)
  390           throws IOException, XMLStreamException;
  391   
  392       /**
  393        *<p>
  394        * Note: can throw XMLStreamException, if name checking is enabled,
  395        * and name is invalid (name check has to be in this writer, not
  396        * caller, since it depends not only on xml limitations, but also
  397        * on encoding limitations)
  398        */
  399       public abstract void writeAttribute(String prefix, String localName, String value)
  400           throws IOException, XMLStreamException;
  401   
  402       public abstract void writeAttribute(String prefix, String localName, char[] value, int offset, int len)
  403           throws IOException, XMLStreamException;
  404   
  405       /*
  406       ////////////////////////////////////////////////////
  407       // Location information
  408       ////////////////////////////////////////////////////
  409        */
  410   
  411       protected abstract int getOutputPtr();
  412       
  413       public int getRow() {
  414           return mLocRowNr;
  415       }
  416   
  417       public int getColumn() {
  418           return (getOutputPtr() - mLocRowStartOffset) + 1;
  419       }
  420   
  421       public int getAbsOffset() {
  422           return mLocPastChars +getOutputPtr();
  423       }
  424   
  425       /*
  426       ////////////////////////////////////////////////////
  427       // Wrapper methods, semi-public
  428       ////////////////////////////////////////////////////
  429        */
  430   
  431       /**
  432        * Method that can be called to get a wrapper instance that
  433        * can be used to essentially call the <code>writeRaw</code>
  434        * method.
  435        */
  436       public final Writer wrapAsRawWriter()
  437       {
  438           if (mRawWrapper == null) {
  439               mRawWrapper = XmlWriterWrapper.wrapWriteRaw(this);
  440           }
  441           return mRawWrapper;
  442       }
  443   
  444       public final Writer wrapAsTextWriter()
  445       {
  446           if (mTextWrapper == null) {
  447               mTextWrapper = XmlWriterWrapper.wrapWriteCharacters(this);
  448           }
  449           return mTextWrapper;
  450       }
  451   
  452       /*
  453       ////////////////////////////////////////////////////
  454       // Helper methods for sub-classes
  455       ////////////////////////////////////////////////////
  456        */
  457   
  458       /**
  459        * Method called to verify that the name is a legal XML name.
  460        */
  461       public final void verifyNameValidity(String name, boolean checkNs)
  462           throws XMLStreamException
  463       {
  464           /* No empty names... caller must have dealt with optional arguments
  465            * prior to calling this method
  466            */
  467           if (name == null || name.length() == 0) {
  468               reportNwfName(ErrorConsts.WERR_NAME_EMPTY);
  469           }
  470           int illegalIx = WstxInputData.findIllegalNameChar(name, checkNs, mXml11);
  471           if (illegalIx >= 0) {
  472               if (illegalIx == 0) {
  473                   reportNwfName(ErrorConsts.WERR_NAME_ILLEGAL_FIRST_CHAR,
  474                                 WstxInputData.getCharDesc(name.charAt(0)));
  475               }
  476               reportNwfName(ErrorConsts.WERR_NAME_ILLEGAL_CHAR,
  477                             WstxInputData.getCharDesc(name.charAt(illegalIx)));
  478           }
  479       }
  480   
  481       /**
  482        * This is the method called when an output method call violates
  483        * name well-formedness checks
  484        * and {@link WstxOutputProperties#P_OUTPUT_VALIDATE_NAMES} is
  485        * is enabled.
  486        */
  487       protected void reportNwfName(String msg)
  488           throws XMLStreamException
  489       {
  490           throwOutputError(msg);
  491       }
  492   
  493       protected void reportNwfName(String msg, Object arg)
  494           throws XMLStreamException
  495       {
  496           throwOutputError(msg, arg);
  497       }
  498   
  499       protected void reportNwfContent(String msg)
  500           throws XMLStreamException
  501       {
  502           throwOutputError(msg);
  503       }
  504   
  505       protected void throwOutputError(String msg)
  506           throws XMLStreamException
  507       {
  508           // First, let's flush any output we may have, to make debugging easier
  509           try {
  510               flush();
  511           } catch (IOException ioe) {
  512               throw new WstxIOException(ioe);
  513           }
  514   
  515           throw new XMLStreamException(msg);
  516       }
  517   
  518       protected void throwOutputError(String format, Object arg)
  519           throws XMLStreamException
  520       {
  521           String msg = MessageFormat.format(format, new Object[] { arg });
  522           throwOutputError(msg);
  523       }
  524   
  525       protected void throwInvalidChar(int c)
  526           throws IOException
  527       {
  528           // First, let's flush any output we may have, to make debugging easier
  529           flush();
  530   
  531           /* 17-May-2006, TSa: Would really be useful if we could throw
  532            *   XMLStreamExceptions; esp. to indicate actual output location.
  533            *   However, this causes problem with methods that call us and
  534            *   can only throw IOExceptions (when invoked via Writer proxy).
  535            *   Need to figure out how to resolve this.
  536            */
  537           if (c == 0) {
  538               throw new IOException("Invalid null character in text to output");
  539           }
  540           if (c < ' ' || (c >= 0x7F && c <= 0x9F)) {
  541               String msg = "Invalid white space character (0x"+Integer.toHexString(c)+") in text to output";
  542               if (mXml11) {
  543                   msg += " (can only be output using character entity)";
  544               }
  545               throw new IOException(msg);
  546           }
  547           if (c > 0x10FFFF) {
  548               throw new IOException("Illegal unicode character point (0x"+Integer.toHexString(c)+") to output; max is 0x10FFFF as per RFC 3629");
  549           }
  550           /* Surrogate pair in non-quotable (not text or attribute value)
  551            * content, and non-unicode encoding (ISO-8859-x, Ascii)?
  552            */
  553           if (c >= SURR1_FIRST && c <= SURR2_LAST) {
  554               throw new IOException("Illegal surrogate pair -- can only be output via character entities, which are not allowed in this content");
  555           }
  556           throw new IOException("Invalid XML character (0x"+Integer.toHexString(c)+") in text to output");
  557       }
  558   }

Save This Page
Home » wstx-sources » com.ctc.wstx.sw » [javadoc | source]