Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/opencms/util/Encoder.java


1   /*
2   * File   : $Source: /usr/local/cvs/opencms/src/com/opencms/util/Encoder.java,v $
3   * Date   : $Date: 2003/05/05 07:50:52 $
4   * Version: $Revision: 1.27 $
5   *
6   * This library is part of OpenCms -
7   * the Open Source Content Mananagement System
8   *
9   * Copyright (C) 2001  The OpenCms Group
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * For further information about OpenCms, please see the
22  * OpenCms Website: http://www.opencms.org
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27  */
28  
29  
30  package com.opencms.util;
31  
32  import com.opencms.core.A_OpenCms;
33  
34  import java.io.UnsupportedEncodingException;
35  import java.net.URLDecoder;
36  import java.net.URLEncoder;
37  import java.util.StringTokenizer;
38  
39  /**
40   * The OpenCms Encoder class provides static methods to decode and encode data.<p>
41   * 
42   * The methods in this class are substitutes for <code>java.net.URLEncoder.encode()</code> and
43   * <code>java.net.URLDecoder.decode()</code>. Use the methods from this class in all OpenCms 
44   * core classes to ensure the encoding is always handled the same way.<p>
45   * 
46   * The de- and encoding uses the same coding mechanism as JavaScript, special characters are
47   * replaxed with <code>%hex</code> where hex is a two digit hex number.<p>
48   * 
49   * <b>Note:</b> On the client side (browser) instead of using corresponding <code>escape</code>
50   * and <code>unescape</code> JavaScript functions, better use <code>encodeURIComponent</code> and
51   * <code>decodeURIComponent</code> functions wich are work properly with unicode characters.
52   * These functions are supported in IE 5.5+ and NS 6+ only.
53   *
54   * @author Michael Emmerich
55   * @author Alexander Kandzior (a.kandzior@alkacon.com)
56   */
57  public class Encoder {
58      
59      /** Flag to indicate if the Java 1.4 encoding method (with encoding parameter) is supported by the JVM */
60      private static boolean C_NEW_ENCODING_SUPPORTED = true;
61  
62      /** Flag to indicate if the Java 1.4 decoding method (with encoding parameter) is supported by the JVM */
63      private static boolean C_NEW_DECODING_SUPPORTED = true;
64      
65      /** Default encoding for JavaScript decodeUriComponent methods is UTF-8 by w3c standard */
66      public static final String C_URI_ENCODING = "UTF-8";
67  
68      /**
69       * Constructor
70       */
71      public Encoder() {}
72      
73      /**
74       * This method is a substitute for <code>URLEncoder.encode()</code>.
75       * Use this in all OpenCms core classes to ensure the encoding is
76       * always handled the same way.<p>
77       * 
78       * In case you don't know what encoding to use, set the value of 
79       * the <code>encoding</code> parameter to <code>null</code>. 
80       * This will use the default encoding, which is propably the right one.<p>
81       * 
82       * It also solves a backward compatiblity issue between Java 1.3 and 1.4,
83       * since 1.3 does not support an explicit encoding parameter and always uses
84       * the default system encoding.<p>
85       * 
86       * @param source the String to encode
87       * @param encoding the encoding to use (if null, the system default is used)
88       * @param fallbackToDefaultDecoding If true, the method will fallback to the default encoding (Java 1.3 style), 
89       * if false, the source String will be returned unencoded 
90       * @return the encoded source String
91       */
92      public static String encode(String source, String encoding, boolean fallbackToDefaultEncoding) {
93          if (source == null) return null;
94          if (encoding != null) {
95              if (C_NEW_ENCODING_SUPPORTED) {
96                  try {
97                      return URLEncoder.encode(source, encoding); 
98                  } 
99                  catch (java.io.UnsupportedEncodingException e) {}
100                 catch (java.lang.NoSuchMethodError n) {
101                     C_NEW_ENCODING_SUPPORTED = false;
102                 }
103             }
104             if (! fallbackToDefaultEncoding) return source;
105         }
106         // Fallback to default encoding
107         return URLEncoder.encode(source);
108     }
109     
110     /**
111      * Encodes a String using the default encoding.
112      * 
113      * @param source the String to encode
114      * @return String the encoded source String
115      */
116     public static String encode(String source) {
117         return encode(source, C_URI_ENCODING, true);
118     }
119 
120     /**
121      * This method is a substitute for <code>URLDecoder.decode()</code>.
122      * Use this in all OpenCms core classes to ensure the encoding is
123      * always handled the same way.<p>
124      * 
125      * In case you don't know what encoding to use, set the value of 
126      * the <code>encoding</code> parameter to <code>null</code>. 
127      * This will use the default encoding, which is propably the right one.<p>
128      * 
129      * It also solves a backward compatiblity issue between Java 1.3 and 1.4,
130      * since 1.3 does not support an explicit encoding parameter and always uses
131      * the default system encoding.<p>
132      * 
133      * @param source The string to decode
134      * @param encoding The encoding to use (if null, the system default is used)
135      * @param fallbackToDefaultDecoding If true, the method will fallback to the default encoding (Java 1.3 style), 
136      * if false, the source String will be returned undecoded
137      * @return The decoded source String
138      */
139     public static String decode(String source, String encoding, boolean fallbackToDefaultDecoding) {
140         if (source == null) return null;
141         if (encoding != null) {
142             if (C_NEW_DECODING_SUPPORTED) {
143                 try {
144                     return URLDecoder.decode(source, encoding); 
145                 } 
146                 catch (java.io.UnsupportedEncodingException e) {}
147                 catch (java.lang.NoSuchMethodError n) {
148                     C_NEW_DECODING_SUPPORTED = false;
149                 }
150             }
151             if (! fallbackToDefaultDecoding) return source;
152         }
153         // Fallback to default decoding
154         return URLDecoder.decode(source);        
155     }
156     
157     /**
158      * Decodes a String using the default encoding.
159      * 
160      * @param source the String to decode
161      * @return String the decoded source String
162      */
163     public static String decode(String source) {
164         return decode(source, C_URI_ENCODING, true);
165     }    
166 
167     /**
168      * Encodes a String in a way that is compatible with the JavaScript escape function.
169      * 
170      * @param Source The textstring to be encoded.
171      * @return The JavaScript escaped string.
172      */
173     public static String escape(String source, String encoding) {
174         StringBuffer ret = new StringBuffer();
175 
176         // URLEncode the text string. This produces a very similar encoding to JavaSscript
177         // encoding, except the blank which is not encoded into a %20.
178         String enc = encode(source, encoding, true);
179         StringTokenizer t = new StringTokenizer(enc, "+");
180         while(t.hasMoreTokens()) {
181             ret.append(t.nextToken());
182             if(t.hasMoreTokens()) {
183                 ret.append("%20");
184             }
185         }
186         return ret.toString();
187     }
188 
189     /**
190      * Encodes a String in a way that is compatible with the JavaScript escape function.
191      * Muliple blanks are encoded _multiply _with %20.
192      * 
193      * @param Source The textstring to be encoded.
194      * @return The JavaScript escaped string.
195      */
196     public static String escapeWBlanks(String source, String encoding) {
197         if(source == null) {
198             return null;
199         }
200         StringBuffer ret = new StringBuffer();
201 
202         // URLEncode the text string. This produces a very similar encoding to JavaSscript
203         // encoding, except the blank which is not encoded into a %20.
204         String enc = encode(source, encoding, true);
205         for(int z = 0;z < enc.length();z++) {
206             if(enc.charAt(z) == '+') {
207                 ret.append("%20");
208             }
209             else {
210                 ret.append(enc.charAt(z));
211             }
212         }
213         return ret.toString();
214     }
215 
216     /**
217      * Escapes a String so it may be printed as text content or attribute
218      * value in a HTML page or an XML file.<p>
219      * 
220      * This method replaces the following characters in a String:
221      * <ul>
222      * <li><b>&lt;</b> with &amp;lt;
223      * <li><b>&gt;</b> with &amp;gt;
224      * <li><b>&amp;</b> with &amp;amp;
225      * <li><b>&quot;</b> with &amp;quot;
226      * </ul>
227      * 
228      * @param source the string to escape
229      * @return the escaped string
230      * 
231      * @see #escapeHtml(String)
232      */
233     public static String escapeXml(String source) {
234         if (source == null) return null;
235         StringBuffer result = new StringBuffer(source.length()*2);
236         int terminatorIndex;
237         for(int i = 0;i < source.length(); ++i) {
238             char ch = source.charAt(i);
239             switch (ch) {
240                 case '<' :
241                     result.append("&lt;");
242                     break;
243                 case '>' :
244                     result.append("&gt;");
245                     break;
246                 case '&' :
247                     // Don't escape already escaped international and special characters
248                     if ((terminatorIndex = source.indexOf(";",i)) > 0)
249                         if(source.substring(i + 1, terminatorIndex).matches("#[0-9]+"))
250                             result.append(ch);
251                         else 
252                             result.append("&amp;");
253                     else
254                         result.append("&amp;");
255                     break;
256                 case '"' :
257                     result.append("&quot;");
258                     break;
259                 default :
260                     result.append(ch);
261             }
262         }
263         return new String(result);
264     }   
265 
266     /**
267      * Escapes special characters in a HTML-String with their number-based 
268      * entity representation, for example &amp; becomes &amp;#38;.<p>
269      * 
270      * A character <code>num</code> is replaced if<br>
271      * <code>((ch !=  32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62)))</code><p>
272      * 
273      * @param source the String to escape
274      * @return String the escaped String
275      * 
276      * @see #escapeXml(String)
277      */
278     public static String escapeHtml(String source) {
279         int terminatorIndex;
280         if (source == null) return null;
281         StringBuffer result = new StringBuffer(source.length()*2);
282         for(int i = 0;i < source.length();i++) {
283             int ch = source.charAt(i);
284             // Avoid escaping already escaped characters;
285             if((ch == 38) && ((terminatorIndex = source.indexOf(";",i)) > 0)) {
286                 if(source.substring(i + 1, terminatorIndex).matches("#[0-9]+|lt|gt|amp|quote")) {
287                     result.append(source.substring(i, terminatorIndex + 1));
288                     // Skip remaining chars up to (and including) ";"
289                     i = terminatorIndex;
290                     continue;
291                 }
292             }
293             if((ch !=  32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62))) {
294                 result.append("&#");
295                 result.append(ch);
296                 result.append(";");
297             }
298             else {
299                 result.append((char)ch);
300             }
301         }
302         return new String(result);
303     }
304 
305     /**
306      * Escapes non ASCII characters in a HTML-String with their number-based 
307      * entity representation, for example &amp; becomes &amp;#38;.<p>
308      * 
309      * A character <code>num</code> is replaced if<br>
310      * <code>(ch > 255)</code><p>
311      * 
312      * @param source the String to escape
313      * @return String the escaped String
314      * 
315      * @see #escapeXml(String)
316      */
317     public static String escapeNonAscii(String source) {
318         if (source == null) return null;
319         StringBuffer result = new StringBuffer(source.length()*2);
320         for(int i = 0;i < source.length();i++) {
321             int ch = source.charAt(i);
322             if(ch > 255) {
323                 result.append("&#");
324                 result.append(ch);
325                 result.append(";");
326             }
327             else {
328                 result.append((char)ch);
329             }
330         }
331         return new String(result);
332     }
333     
334     /**
335      * Decodes a String in a way that is compatible with the JavaScript 
336      * unescape function.
337      * 
338      * @param Source The String to be decoded.
339      * @return The JavaScript unescaped String.
340      */
341     public static String unescape(String source, String encoding) {
342         if(source == null){
343             return null;
344         }
345         int len = source.length();
346         // to use standard decoder we need to replace '+' with "%20" (space)
347         StringBuffer preparedSource = new StringBuffer(len);
348         for (int i = 0; i < len; i++) {
349             char c = source.charAt(i);
350             if (c == '+') {
351                 preparedSource.append("%20");
352             } else {
353                 preparedSource.append(c);
354             }
355         }
356         return decode(preparedSource.toString(), encoding, true);
357     }
358     
359     /**
360      * Changes the encoding of a byte array that represents a String.<p>
361      * 
362      * @param input the byte array to convert
363      * @param oldEncoding the current encoding of the byte array
364      * @param newEncoding the new encoding of the byte array
365      * @return byte[] the byte array encoded in the new encoding
366      */
367     public static byte[] changeEncoding(byte[] input, String oldEncoding, String newEncoding) {
368         if ((oldEncoding == null) || (newEncoding == null)) return input;
369         if (oldEncoding.trim().equalsIgnoreCase(newEncoding.trim())) return input;
370         byte[] result = input;
371         try {
372             result = (new String(input, oldEncoding)).getBytes(newEncoding);
373         } catch (UnsupportedEncodingException e) {
374             // return value will be input value
375         }
376         return result;
377     }
378     
379     /**
380      * Re-decodes a String that has not been correctly decoded and thus has scrambled
381      * character bytes.<p>
382      * 
383      * This is an equivalent to the JavaScript "decodeURIComponent" function.
384      * It converts from the default "UTF-8" to the currently selected system encoding.<p>
385      * 
386      * @param input the String to convert
387      * @return String the converted String
388      */
389     public static String redecodeUriComponent(String input) {
390        if (input == null) return input;
391        return new String(changeEncoding(input.getBytes(), C_URI_ENCODING, A_OpenCms.getDefaultEncoding())); 
392     }
393 }