Save This Page
Home » commons-lang-2.5-src » org.apache.commons » lang » [javadoc | source]
    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   package org.apache.commons.lang;
   18   
   19   import java.io.IOException;
   20   import java.io.StringWriter;
   21   import java.io.Writer;
   22   import java.util.Locale;
   23   
   24   import org.apache.commons.lang.exception.NestableRuntimeException;
   25   
   26   /**
   27    * <p>Escapes and unescapes <code>String</code>s for
   28    * Java, Java Script, HTML, XML, and SQL.</p>
   29    *
   30    * @author Apache Software Foundation
   31    * @author Apache Jakarta Turbine
   32    * @author Purple Technology
   33    * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
   34    * @author Antony Riley
   35    * @author Helge Tesgaard
   36    * @author <a href="sean@boohai.com">Sean Brown</a>
   37    * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
   38    * @author Phil Steitz
   39    * @author Pete Gieser
   40    * @since 2.0
   41    * @version $Id: StringEscapeUtils.java 905636 2010-02-02 14:03:32Z niallp $
   42    */
   43   public class StringEscapeUtils {
   44   
   45       private static final char CSV_DELIMITER = ',';
   46       private static final char CSV_QUOTE = '"';
   47       private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
   48       private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
   49   
   50       /**
   51        * <p><code>StringEscapeUtils</code> instances should NOT be constructed in
   52        * standard programming.</p>
   53        *
   54        * <p>Instead, the class should be used as:
   55        * <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
   56        *
   57        * <p>This constructor is public to permit tools that require a JavaBean
   58        * instance to operate.</p>
   59        */
   60       public StringEscapeUtils() {
   61         super();
   62       }
   63   
   64       // Java and JavaScript
   65       //--------------------------------------------------------------------------
   66       /**
   67        * <p>Escapes the characters in a <code>String</code> using Java String rules.</p>
   68        *
   69        * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
   70        *
   71        * <p>So a tab becomes the characters <code>'\\'</code> and
   72        * <code>'t'</code>.</p>
   73        *
   74        * <p>The only difference between Java strings and JavaScript strings
   75        * is that in JavaScript, a single quote must be escaped.</p>
   76        *
   77        * <p>Example:
   78        * <pre>
   79        * input string: He didn't say, "Stop!"
   80        * output string: He didn't say, \"Stop!\"
   81        * </pre>
   82        * </p>
   83        *
   84        * @param str  String to escape values in, may be null
   85        * @return String with escaped values, <code>null</code> if null string input
   86        */
   87       public static String escapeJava(String str) {
   88           return escapeJavaStyleString(str, false, false);
   89       }
   90   
   91       /**
   92        * <p>Escapes the characters in a <code>String</code> using Java String rules to
   93        * a <code>Writer</code>.</p>
   94        * 
   95        * <p>A <code>null</code> string input has no effect.</p>
   96        * 
   97        * @see #escapeJava(java.lang.String)
   98        * @param out  Writer to write escaped string into
   99        * @param str  String to escape values in, may be null
  100        * @throws IllegalArgumentException if the Writer is <code>null</code>
  101        * @throws IOException if error occurs on underlying Writer
  102        */
  103       public static void escapeJava(Writer out, String str) throws IOException {
  104           escapeJavaStyleString(out, str, false, false);
  105       }
  106   
  107       /**
  108        * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
  109        * <p>Escapes any values it finds into their JavaScript String form.
  110        * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  111        *
  112        * <p>So a tab becomes the characters <code>'\\'</code> and
  113        * <code>'t'</code>.</p>
  114        *
  115        * <p>The only difference between Java strings and JavaScript strings
  116        * is that in JavaScript, a single quote must be escaped.</p>
  117        *
  118        * <p>Example:
  119        * <pre>
  120        * input string: He didn't say, "Stop!"
  121        * output string: He didn\'t say, \"Stop!\"
  122        * </pre>
  123        * </p>
  124        *
  125        * @param str  String to escape values in, may be null
  126        * @return String with escaped values, <code>null</code> if null string input
  127        */
  128       public static String escapeJavaScript(String str) {
  129           return escapeJavaStyleString(str, true, true);
  130       }
  131   
  132       /**
  133        * <p>Escapes the characters in a <code>String</code> using JavaScript String rules
  134        * to a <code>Writer</code>.</p>
  135        * 
  136        * <p>A <code>null</code> string input has no effect.</p>
  137        * 
  138        * @see #escapeJavaScript(java.lang.String)
  139        * @param out  Writer to write escaped string into
  140        * @param str  String to escape values in, may be null
  141        * @throws IllegalArgumentException if the Writer is <code>null</code>
  142        * @throws IOException if error occurs on underlying Writer
  143        **/
  144       public static void escapeJavaScript(Writer out, String str) throws IOException {
  145           escapeJavaStyleString(out, str, true, true);
  146       }
  147   
  148       /**
  149        * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
  150        * 
  151        * @param str String to escape values in, may be null
  152        * @param escapeSingleQuotes escapes single quotes if <code>true</code>
  153        * @param escapeForwardSlash TODO
  154        * @return the escaped string
  155        */
  156       private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes, boolean escapeForwardSlash) {
  157           if (str == null) {
  158               return null;
  159           }
  160           try {
  161               StringWriter writer = new StringWriter(str.length() * 2);
  162               escapeJavaStyleString(writer, str, escapeSingleQuotes, escapeForwardSlash);
  163               return writer.toString();
  164           } catch (IOException ioe) {
  165               // this should never ever happen while writing to a StringWriter
  166               throw new UnhandledException(ioe);
  167           }
  168       }
  169   
  170       /**
  171        * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
  172        * 
  173        * @param out write to receieve the escaped string
  174        * @param str String to escape values in, may be null
  175        * @param escapeSingleQuote escapes single quotes if <code>true</code>
  176        * @param escapeForwardSlash TODO
  177        * @throws IOException if an IOException occurs
  178        */
  179       private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote,
  180               boolean escapeForwardSlash) throws IOException {
  181           if (out == null) {
  182               throw new IllegalArgumentException("The Writer must not be null");
  183           }
  184           if (str == null) {
  185               return;
  186           }
  187           int sz;
  188           sz = str.length();
  189           for (int i = 0; i < sz; i++) {
  190               char ch = str.charAt(i);
  191   
  192               // handle unicode
  193               if (ch > 0xfff) {
  194                   out.write("\\u" + hex(ch));
  195               } else if (ch > 0xff) {
  196                   out.write("\\u0" + hex(ch));
  197               } else if (ch > 0x7f) {
  198                   out.write("\\u00" + hex(ch));
  199               } else if (ch < 32) {
  200                   switch (ch) {
  201                       case '\b' :
  202                           out.write('\\');
  203                           out.write('b');
  204                           break;
  205                       case '\n' :
  206                           out.write('\\');
  207                           out.write('n');
  208                           break;
  209                       case '\t' :
  210                           out.write('\\');
  211                           out.write('t');
  212                           break;
  213                       case '\f' :
  214                           out.write('\\');
  215                           out.write('f');
  216                           break;
  217                       case '\r' :
  218                           out.write('\\');
  219                           out.write('r');
  220                           break;
  221                       default :
  222                           if (ch > 0xf) {
  223                               out.write("\\u00" + hex(ch));
  224                           } else {
  225                               out.write("\\u000" + hex(ch));
  226                           }
  227                           break;
  228                   }
  229               } else {
  230                   switch (ch) {
  231                       case '\'' :
  232                           if (escapeSingleQuote) {
  233                               out.write('\\');
  234                           }
  235                           out.write('\'');
  236                           break;
  237                       case '"' :
  238                           out.write('\\');
  239                           out.write('"');
  240                           break;
  241                       case '\\' :
  242                           out.write('\\');
  243                           out.write('\\');
  244                           break;
  245                       case '/' :
  246                           if (escapeForwardSlash) {
  247                               out.write('\\');
  248                           }
  249                           out.write('/');
  250                           break;
  251                       default :
  252                           out.write(ch);
  253                           break;
  254                   }
  255               }
  256           }
  257       }
  258   
  259       /**
  260        * <p>Returns an upper case hexadecimal <code>String</code> for the given
  261        * character.</p>
  262        * 
  263        * @param ch The character to convert.
  264        * @return An upper case hexadecimal <code>String</code>
  265        */
  266       private static String hex(char ch) {
  267           return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
  268       }
  269   
  270       /**
  271        * <p>Unescapes any Java literals found in the <code>String</code>.
  272        * For example, it will turn a sequence of <code>'\'</code> and
  273        * <code>'n'</code> into a newline character, unless the <code>'\'</code>
  274        * is preceded by another <code>'\'</code>.</p>
  275        * 
  276        * @param str  the <code>String</code> to unescape, may be null
  277        * @return a new unescaped <code>String</code>, <code>null</code> if null string input
  278        */
  279       public static String unescapeJava(String str) {
  280           if (str == null) {
  281               return null;
  282           }
  283           try {
  284               StringWriter writer = new StringWriter(str.length());
  285               unescapeJava(writer, str);
  286               return writer.toString();
  287           } catch (IOException ioe) {
  288               // this should never ever happen while writing to a StringWriter
  289               throw new UnhandledException(ioe);
  290           }
  291       }
  292   
  293       /**
  294        * <p>Unescapes any Java literals found in the <code>String</code> to a
  295        * <code>Writer</code>.</p>
  296        *
  297        * <p>For example, it will turn a sequence of <code>'\'</code> and
  298        * <code>'n'</code> into a newline character, unless the <code>'\'</code>
  299        * is preceded by another <code>'\'</code>.</p>
  300        * 
  301        * <p>A <code>null</code> string input has no effect.</p>
  302        * 
  303        * @param out  the <code>Writer</code> used to output unescaped characters
  304        * @param str  the <code>String</code> to unescape, may be null
  305        * @throws IllegalArgumentException if the Writer is <code>null</code>
  306        * @throws IOException if error occurs on underlying Writer
  307        */
  308       public static void unescapeJava(Writer out, String str) throws IOException {
  309           if (out == null) {
  310               throw new IllegalArgumentException("The Writer must not be null");
  311           }
  312           if (str == null) {
  313               return;
  314           }
  315           int sz = str.length();
  316           StringBuffer unicode = new StringBuffer(4);
  317           boolean hadSlash = false;
  318           boolean inUnicode = false;
  319           for (int i = 0; i < sz; i++) {
  320               char ch = str.charAt(i);
  321               if (inUnicode) {
  322                   // if in unicode, then we're reading unicode
  323                   // values in somehow
  324                   unicode.append(ch);
  325                   if (unicode.length() == 4) {
  326                       // unicode now contains the four hex digits
  327                       // which represents our unicode character
  328                       try {
  329                           int value = Integer.parseInt(unicode.toString(), 16);
  330                           out.write((char) value);
  331                           unicode.setLength(0);
  332                           inUnicode = false;
  333                           hadSlash = false;
  334                       } catch (NumberFormatException nfe) {
  335                           throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe);
  336                       }
  337                   }
  338                   continue;
  339               }
  340               if (hadSlash) {
  341                   // handle an escaped value
  342                   hadSlash = false;
  343                   switch (ch) {
  344                       case '\\':
  345                           out.write('\\');
  346                           break;
  347                       case '\'':
  348                           out.write('\'');
  349                           break;
  350                       case '\"':
  351                           out.write('"');
  352                           break;
  353                       case 'r':
  354                           out.write('\r');
  355                           break;
  356                       case 'f':
  357                           out.write('\f');
  358                           break;
  359                       case 't':
  360                           out.write('\t');
  361                           break;
  362                       case 'n':
  363                           out.write('\n');
  364                           break;
  365                       case 'b':
  366                           out.write('\b');
  367                           break;
  368                       case 'u':
  369                           {
  370                               // uh-oh, we're in unicode country....
  371                               inUnicode = true;
  372                               break;
  373                           }
  374                       default :
  375                           out.write(ch);
  376                           break;
  377                   }
  378                   continue;
  379               } else if (ch == '\\') {
  380                   hadSlash = true;
  381                   continue;
  382               }
  383               out.write(ch);
  384           }
  385           if (hadSlash) {
  386               // then we're in the weird case of a \ at the end of the
  387               // string, let's output it anyway.
  388               out.write('\\');
  389           }
  390       }
  391   
  392       /**
  393        * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
  394        *
  395        * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
  396        * into a newline character, unless the <code>'\'</code> is preceded by another
  397        * <code>'\'</code>.</p>
  398        *
  399        * @see #unescapeJava(String)
  400        * @param str  the <code>String</code> to unescape, may be null
  401        * @return A new unescaped <code>String</code>, <code>null</code> if null string input
  402        */
  403       public static String unescapeJavaScript(String str) {
  404           return unescapeJava(str);
  405       }
  406   
  407       /**
  408        * <p>Unescapes any JavaScript literals found in the <code>String</code> to a
  409        * <code>Writer</code>.</p>
  410        *
  411        * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
  412        * into a newline character, unless the <code>'\'</code> is preceded by another
  413        * <code>'\'</code>.</p>
  414        *
  415        * <p>A <code>null</code> string input has no effect.</p>
  416        * 
  417        * @see #unescapeJava(Writer,String)
  418        * @param out  the <code>Writer</code> used to output unescaped characters
  419        * @param str  the <code>String</code> to unescape, may be null
  420        * @throws IllegalArgumentException if the Writer is <code>null</code>
  421        * @throws IOException if error occurs on underlying Writer
  422        */
  423       public static void unescapeJavaScript(Writer out, String str) throws IOException {
  424           unescapeJava(out, str);
  425       }
  426   
  427       // HTML and XML
  428       //--------------------------------------------------------------------------
  429       /**
  430        * <p>Escapes the characters in a <code>String</code> using HTML entities.</p>
  431        *
  432        * <p>
  433        * For example:
  434        * </p> 
  435        * <p><code>"bread" & "butter"</code></p>
  436        * becomes:
  437        * <p>
  438        * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
  439        * </p>
  440        *
  441        * <p>Supports all known HTML 4.0 entities, including funky accents.
  442        * Note that the commonly used apostrophe escape character (&amp;apos;)
  443        * is not a legal entity and so is not supported). </p>
  444        *
  445        * @param str  the <code>String</code> to escape, may be null
  446        * @return a new escaped <code>String</code>, <code>null</code> if null string input
  447        * 
  448        * @see #unescapeHtml(String)
  449        * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
  450        * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
  451        * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
  452        * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
  453        * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
  454        */
  455       public static String escapeHtml(String str) {
  456           if (str == null) {
  457               return null;
  458           }
  459           try {
  460               StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
  461               escapeHtml(writer, str);
  462               return writer.toString();
  463           } catch (IOException ioe) {
  464               //should be impossible
  465               throw new UnhandledException(ioe);
  466           }
  467       }
  468   
  469       /**
  470        * <p>Escapes the characters in a <code>String</code> using HTML entities and writes
  471        * them to a <code>Writer</code>.</p>
  472        *
  473        * <p>
  474        * For example:
  475        * </p> 
  476        * <code>"bread" & "butter"</code>
  477        * <p>becomes:</p>
  478        * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
  479        *
  480        * <p>Supports all known HTML 4.0 entities, including funky accents.
  481        * Note that the commonly used apostrophe escape character (&amp;apos;)
  482        * is not a legal entity and so is not supported). </p>
  483        *
  484        * @param writer  the writer receiving the escaped string, not null
  485        * @param string  the <code>String</code> to escape, may be null
  486        * @throws IllegalArgumentException if the writer is null
  487        * @throws IOException when <code>Writer</code> passed throws the exception from
  488        *                                       calls to the {@link Writer#write(int)} methods.
  489        * 
  490        * @see #escapeHtml(String)
  491        * @see #unescapeHtml(String)
  492        * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
  493        * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
  494        * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
  495        * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
  496        * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
  497        */
  498       public static void escapeHtml(Writer writer, String string) throws IOException {
  499           if (writer == null ) {
  500               throw new IllegalArgumentException ("The Writer must not be null.");
  501           }
  502           if (string == null) {
  503               return;
  504           }
  505           Entities.HTML40.escape(writer, string);
  506       }
  507   
  508       //-----------------------------------------------------------------------
  509       /**
  510        * <p>Unescapes a string containing entity escapes to a string
  511        * containing the actual Unicode characters corresponding to the
  512        * escapes. Supports HTML 4.0 entities.</p>
  513        *
  514        * <p>For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;"
  515        * will become "&lt;Fran&ccedil;ais&gt;"</p>
  516        *
  517        * <p>If an entity is unrecognized, it is left alone, and inserted
  518        * verbatim into the result string. e.g. "&amp;gt;&amp;zzzz;x" will
  519        * become "&gt;&amp;zzzz;x".</p>
  520        *
  521        * @param str  the <code>String</code> to unescape, may be null
  522        * @return a new unescaped <code>String</code>, <code>null</code> if null string input
  523        * @see #escapeHtml(Writer, String)
  524        */
  525       public static String unescapeHtml(String str) {
  526           if (str == null) {
  527               return null;
  528           }
  529           try {
  530               StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
  531               unescapeHtml(writer, str);
  532               return writer.toString();
  533           } catch (IOException ioe) {
  534               //should be impossible
  535               throw new UnhandledException(ioe);
  536           }
  537       }
  538   
  539       /**
  540        * <p>Unescapes a string containing entity escapes to a string
  541        * containing the actual Unicode characters corresponding to the
  542        * escapes. Supports HTML 4.0 entities.</p>
  543        *
  544        * <p>For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;"
  545        * will become "&lt;Fran&ccedil;ais&gt;"</p>
  546        *
  547        * <p>If an entity is unrecognized, it is left alone, and inserted
  548        * verbatim into the result string. e.g. "&amp;gt;&amp;zzzz;x" will
  549        * become "&gt;&amp;zzzz;x".</p>
  550        *
  551        * @param writer  the writer receiving the unescaped string, not null
  552        * @param string  the <code>String</code> to unescape, may be null
  553        * @throws IllegalArgumentException if the writer is null
  554        * @throws IOException if an IOException occurs
  555        * @see #escapeHtml(String)
  556        */
  557       public static void unescapeHtml(Writer writer, String string) throws IOException {
  558           if (writer == null ) {
  559               throw new IllegalArgumentException ("The Writer must not be null.");
  560           }
  561           if (string == null) {
  562               return;
  563           }
  564           Entities.HTML40.unescape(writer, string);
  565       }
  566   
  567       //-----------------------------------------------------------------------
  568       /**
  569        * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
  570        *
  571        * <p>For example: <tt>"bread" & "butter"</tt> =>
  572        * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
  573        * </p>
  574        *
  575        * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
  576        * Does not support DTDs or external entities.</p>
  577        *
  578        * <p>Note that unicode characters greater than 0x7f are currently escaped to 
  579        *    their numerical \\u equivalent. This may change in future releases. </p>
  580        *
  581        * @param writer  the writer receiving the unescaped string, not null
  582        * @param str  the <code>String</code> to escape, may be null
  583        * @throws IllegalArgumentException if the writer is null
  584        * @throws IOException if there is a problem writing
  585        * @see #unescapeXml(java.lang.String)
  586        */
  587       public static void escapeXml(Writer writer, String str) throws IOException {
  588           if (writer == null ) {
  589               throw new IllegalArgumentException ("The Writer must not be null.");
  590           }
  591           if (str == null) {
  592               return;
  593           }
  594           Entities.XML.escape(writer, str);
  595       }
  596   
  597       /**
  598        * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
  599        *
  600        * <p>For example: <tt>"bread" & "butter"</tt> =>
  601        * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
  602        * </p>
  603        *
  604        * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
  605        * Does not support DTDs or external entities.</p>
  606        *
  607        * <p>Note that unicode characters greater than 0x7f are currently escaped to 
  608        *    their numerical \\u equivalent. This may change in future releases. </p>
  609        *
  610        * @param str  the <code>String</code> to escape, may be null
  611        * @return a new escaped <code>String</code>, <code>null</code> if null string input
  612        * @see #unescapeXml(java.lang.String)
  613        */
  614       public static String escapeXml(String str) {
  615           if (str == null) {
  616               return null;
  617           }
  618           return Entities.XML.escape(str);
  619       }
  620   
  621       //-----------------------------------------------------------------------
  622       /**
  623        * <p>Unescapes a string containing XML entity escapes to a string
  624        * containing the actual Unicode characters corresponding to the
  625        * escapes.</p>
  626        *
  627        * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
  628        * Does not support DTDs or external entities.</p>
  629        *
  630        * <p>Note that numerical \\u unicode codes are unescaped to their respective 
  631        *    unicode characters. This may change in future releases. </p>
  632        *
  633        * @param writer  the writer receiving the unescaped string, not null
  634        * @param str  the <code>String</code> to unescape, may be null
  635        * @throws IllegalArgumentException if the writer is null
  636        * @throws IOException if there is a problem writing
  637        * @see #escapeXml(String)
  638        */
  639       public static void unescapeXml(Writer writer, String str) throws IOException {
  640           if (writer == null ) {
  641               throw new IllegalArgumentException ("The Writer must not be null.");
  642           }
  643           if (str == null) {
  644               return;
  645           }
  646           Entities.XML.unescape(writer, str);
  647       }
  648   
  649       /**
  650        * <p>Unescapes a string containing XML entity escapes to a string
  651        * containing the actual Unicode characters corresponding to the
  652        * escapes.</p>
  653        *
  654        * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
  655        * Does not support DTDs or external entities.</p>
  656        *
  657        * <p>Note that numerical \\u unicode codes are unescaped to their respective 
  658        *    unicode characters. This may change in future releases. </p>
  659        *
  660        * @param str  the <code>String</code> to unescape, may be null
  661        * @return a new unescaped <code>String</code>, <code>null</code> if null string input
  662        * @see #escapeXml(String)
  663        */
  664       public static String unescapeXml(String str) {
  665           if (str == null) {
  666               return null;
  667           }
  668           return Entities.XML.unescape(str);
  669       }
  670   
  671       //-----------------------------------------------------------------------
  672       /**
  673        * <p>Escapes the characters in a <code>String</code> to be suitable to pass to
  674        * an SQL query.</p>
  675        *
  676        * <p>For example,
  677        * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" + 
  678        *   StringEscapeUtils.escapeSql("McHale's Navy") + 
  679        *   "'");</pre>
  680        * </p>
  681        *
  682        * <p>At present, this method only turns single-quotes into doubled single-quotes
  683        * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not
  684        * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p>
  685        *
  686        * see http://www.jguru.com/faq/view.jsp?EID=8881
  687        * @param str  the string to escape, may be null
  688        * @return a new String, escaped for SQL, <code>null</code> if null string input
  689        */
  690       public static String escapeSql(String str) {
  691           if (str == null) {
  692               return null;
  693           }
  694           return StringUtils.replace(str, "'", "''");
  695       }
  696   
  697       //-----------------------------------------------------------------------
  698   
  699       /**
  700        * <p>Returns a <code>String</code> value for a CSV column enclosed in double quotes,
  701        * if required.</p>
  702        *
  703        * <p>If the value contains a comma, newline or double quote, then the
  704        *    String value is returned enclosed in double quotes.</p>
  705        * </p>
  706        *
  707        * <p>Any double quote characters in the value are escaped with another double quote.</p>
  708        *
  709        * <p>If the value does not contain a comma, newline or double quote, then the
  710        *    String value is returned unchanged.</p>
  711        * </p>
  712        *
  713        * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
  714        * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
  715        *
  716        * @param str the input CSV column String, may be null
  717        * @return the input String, enclosed in double quotes if the value contains a comma,
  718        * newline or double quote, <code>null</code> if null string input
  719        * @since 2.4
  720        */
  721       public static String escapeCsv(String str) {
  722           if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
  723               return str;
  724           }
  725           try {
  726               StringWriter writer = new StringWriter();
  727               escapeCsv(writer, str);
  728               return writer.toString();
  729           } catch (IOException ioe) {
  730               // this should never ever happen while writing to a StringWriter
  731               throw new UnhandledException(ioe);
  732           }
  733       }
  734   
  735       /**
  736        * <p>Writes a <code>String</code> value for a CSV column enclosed in double quotes,
  737        * if required.</p>
  738        *
  739        * <p>If the value contains a comma, newline or double quote, then the
  740        *    String value is written enclosed in double quotes.</p>
  741        * </p>
  742        *
  743        * <p>Any double quote characters in the value are escaped with another double quote.</p>
  744        *
  745        * <p>If the value does not contain a comma, newline or double quote, then the
  746        *    String value is written unchanged (null values are ignored).</p>
  747        * </p>
  748        *
  749        * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
  750        * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
  751        *
  752        * @param str the input CSV column String, may be null
  753        * @param out Writer to write input string to, enclosed in double quotes if it contains
  754        * a comma, newline or double quote
  755        * @throws IOException if error occurs on underlying Writer
  756        * @since 2.4
  757        */
  758       public static void escapeCsv(Writer out, String str) throws IOException {
  759           if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
  760               if (str != null) {
  761                   out.write(str);
  762               }
  763               return;
  764           }
  765           out.write(CSV_QUOTE);
  766           for (int i = 0; i < str.length(); i++) {
  767               char c = str.charAt(i);
  768               if (c == CSV_QUOTE) {
  769                   out.write(CSV_QUOTE); // escape double quote
  770               }
  771               out.write(c);
  772           }
  773           out.write(CSV_QUOTE);
  774       }
  775   
  776       /**
  777        * <p>Returns a <code>String</code> value for an unescaped CSV column. </p>
  778        *
  779        * <p>If the value is enclosed in double quotes, and contains a comma, newline 
  780        *    or double quote, then quotes are removed. 
  781        * </p>
  782        *
  783        * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 
  784        *    to just one double quote. </p>
  785        *
  786        * <p>If the value is not enclosed in double quotes, or is and does not contain a 
  787        *    comma, newline or double quote, then the String value is returned unchanged.</p>
  788        * </p>
  789        *
  790        * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
  791        * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
  792        *
  793        * @param str the input CSV column String, may be null
  794        * @return the input String, with enclosing double quotes removed and embedded double 
  795        * quotes unescaped, <code>null</code> if null string input
  796        * @since 2.4
  797        */
  798       public static String unescapeCsv(String str) {
  799           if (str == null) {
  800               return null;
  801           }
  802           try {
  803               StringWriter writer = new StringWriter();
  804               unescapeCsv(writer, str);
  805               return writer.toString();
  806           } catch (IOException ioe) {
  807               // this should never ever happen while writing to a StringWriter
  808               throw new UnhandledException(ioe);
  809           }
  810       }
  811   
  812       /**
  813        * <p>Returns a <code>String</code> value for an unescaped CSV column. </p>
  814        *
  815        * <p>If the value is enclosed in double quotes, and contains a comma, newline 
  816        *    or double quote, then quotes are removed. 
  817        * </p>
  818        *
  819        * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 
  820        *    to just one double quote. </p>
  821        *
  822        * <p>If the value is not enclosed in double quotes, or is and does not contain a 
  823        *    comma, newline or double quote, then the String value is returned unchanged.</p>
  824        * </p>
  825        *
  826        * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
  827        * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
  828        *
  829        * @param str the input CSV column String, may be null
  830        * @param out Writer to write the input String to, with enclosing double quotes 
  831        * removed and embedded double quotes unescaped, <code>null</code> if null string input
  832        * @throws IOException if error occurs on underlying Writer
  833        * @since 2.4
  834        */
  835       public static void unescapeCsv(Writer out, String str) throws IOException {
  836           if (str == null) {
  837               return;
  838           }
  839           if (str.length() < 2) {
  840               out.write(str);
  841               return;
  842           }
  843           if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) {
  844               out.write(str);
  845               return;
  846           }
  847   
  848           // strip quotes
  849           String quoteless = str.substring(1, str.length() - 1);
  850   
  851           if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
  852               // deal with escaped quotes; ie) ""
  853               str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR);
  854           }
  855   
  856           out.write(str);
  857       }
  858   
  859   }

Save This Page
Home » commons-lang-2.5-src » org.apache.commons » lang » [javadoc | source]