Source code: com/opencms/util/Encoder.java
1 /*
2 * File : $Source: /usr/local/cvs/opencms/src/com/opencms/util/Encoder.java,v $
3 * Date : $Date: 2003/05/05 07:50:52 $
4 * Version: $Revision: 1.27 $
5 *
6 * This library is part of OpenCms -
7 * the Open Source Content Mananagement System
8 *
9 * Copyright (C) 2001 The OpenCms Group
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * For further information about OpenCms, please see the
22 * OpenCms Website: http://www.opencms.org
23 *
24 * You should have received a copy of the GNU Lesser General Public
25 * License along with this library; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 */
28
29
30 package com.opencms.util;
31
32 import com.opencms.core.A_OpenCms;
33
34 import java.io.UnsupportedEncodingException;
35 import java.net.URLDecoder;
36 import java.net.URLEncoder;
37 import java.util.StringTokenizer;
38
39 /**
40 * The OpenCms Encoder class provides static methods to decode and encode data.<p>
41 *
42 * The methods in this class are substitutes for <code>java.net.URLEncoder.encode()</code> and
43 * <code>java.net.URLDecoder.decode()</code>. Use the methods from this class in all OpenCms
44 * core classes to ensure the encoding is always handled the same way.<p>
45 *
46 * The de- and encoding uses the same coding mechanism as JavaScript, special characters are
47 * replaxed with <code>%hex</code> where hex is a two digit hex number.<p>
48 *
49 * <b>Note:</b> On the client side (browser) instead of using corresponding <code>escape</code>
50 * and <code>unescape</code> JavaScript functions, better use <code>encodeURIComponent</code> and
51 * <code>decodeURIComponent</code> functions wich are work properly with unicode characters.
52 * These functions are supported in IE 5.5+ and NS 6+ only.
53 *
54 * @author Michael Emmerich
55 * @author Alexander Kandzior (a.kandzior@alkacon.com)
56 */
57 public class Encoder {
58
59 /** Flag to indicate if the Java 1.4 encoding method (with encoding parameter) is supported by the JVM */
60 private static boolean C_NEW_ENCODING_SUPPORTED = true;
61
62 /** Flag to indicate if the Java 1.4 decoding method (with encoding parameter) is supported by the JVM */
63 private static boolean C_NEW_DECODING_SUPPORTED = true;
64
65 /** Default encoding for JavaScript decodeUriComponent methods is UTF-8 by w3c standard */
66 public static final String C_URI_ENCODING = "UTF-8";
67
68 /**
69 * Constructor
70 */
71 public Encoder() {}
72
73 /**
74 * This method is a substitute for <code>URLEncoder.encode()</code>.
75 * Use this in all OpenCms core classes to ensure the encoding is
76 * always handled the same way.<p>
77 *
78 * In case you don't know what encoding to use, set the value of
79 * the <code>encoding</code> parameter to <code>null</code>.
80 * This will use the default encoding, which is propably the right one.<p>
81 *
82 * It also solves a backward compatiblity issue between Java 1.3 and 1.4,
83 * since 1.3 does not support an explicit encoding parameter and always uses
84 * the default system encoding.<p>
85 *
86 * @param source the String to encode
87 * @param encoding the encoding to use (if null, the system default is used)
88 * @param fallbackToDefaultDecoding If true, the method will fallback to the default encoding (Java 1.3 style),
89 * if false, the source String will be returned unencoded
90 * @return the encoded source String
91 */
92 public static String encode(String source, String encoding, boolean fallbackToDefaultEncoding) {
93 if (source == null) return null;
94 if (encoding != null) {
95 if (C_NEW_ENCODING_SUPPORTED) {
96 try {
97 return URLEncoder.encode(source, encoding);
98 }
99 catch (java.io.UnsupportedEncodingException e) {}
100 catch (java.lang.NoSuchMethodError n) {
101 C_NEW_ENCODING_SUPPORTED = false;
102 }
103 }
104 if (! fallbackToDefaultEncoding) return source;
105 }
106 // Fallback to default encoding
107 return URLEncoder.encode(source);
108 }
109
110 /**
111 * Encodes a String using the default encoding.
112 *
113 * @param source the String to encode
114 * @return String the encoded source String
115 */
116 public static String encode(String source) {
117 return encode(source, C_URI_ENCODING, true);
118 }
119
120 /**
121 * This method is a substitute for <code>URLDecoder.decode()</code>.
122 * Use this in all OpenCms core classes to ensure the encoding is
123 * always handled the same way.<p>
124 *
125 * In case you don't know what encoding to use, set the value of
126 * the <code>encoding</code> parameter to <code>null</code>.
127 * This will use the default encoding, which is propably the right one.<p>
128 *
129 * It also solves a backward compatiblity issue between Java 1.3 and 1.4,
130 * since 1.3 does not support an explicit encoding parameter and always uses
131 * the default system encoding.<p>
132 *
133 * @param source The string to decode
134 * @param encoding The encoding to use (if null, the system default is used)
135 * @param fallbackToDefaultDecoding If true, the method will fallback to the default encoding (Java 1.3 style),
136 * if false, the source String will be returned undecoded
137 * @return The decoded source String
138 */
139 public static String decode(String source, String encoding, boolean fallbackToDefaultDecoding) {
140 if (source == null) return null;
141 if (encoding != null) {
142 if (C_NEW_DECODING_SUPPORTED) {
143 try {
144 return URLDecoder.decode(source, encoding);
145 }
146 catch (java.io.UnsupportedEncodingException e) {}
147 catch (java.lang.NoSuchMethodError n) {
148 C_NEW_DECODING_SUPPORTED = false;
149 }
150 }
151 if (! fallbackToDefaultDecoding) return source;
152 }
153 // Fallback to default decoding
154 return URLDecoder.decode(source);
155 }
156
157 /**
158 * Decodes a String using the default encoding.
159 *
160 * @param source the String to decode
161 * @return String the decoded source String
162 */
163 public static String decode(String source) {
164 return decode(source, C_URI_ENCODING, true);
165 }
166
167 /**
168 * Encodes a String in a way that is compatible with the JavaScript escape function.
169 *
170 * @param Source The textstring to be encoded.
171 * @return The JavaScript escaped string.
172 */
173 public static String escape(String source, String encoding) {
174 StringBuffer ret = new StringBuffer();
175
176 // URLEncode the text string. This produces a very similar encoding to JavaSscript
177 // encoding, except the blank which is not encoded into a %20.
178 String enc = encode(source, encoding, true);
179 StringTokenizer t = new StringTokenizer(enc, "+");
180 while(t.hasMoreTokens()) {
181 ret.append(t.nextToken());
182 if(t.hasMoreTokens()) {
183 ret.append("%20");
184 }
185 }
186 return ret.toString();
187 }
188
189 /**
190 * Encodes a String in a way that is compatible with the JavaScript escape function.
191 * Muliple blanks are encoded _multiply _with %20.
192 *
193 * @param Source The textstring to be encoded.
194 * @return The JavaScript escaped string.
195 */
196 public static String escapeWBlanks(String source, String encoding) {
197 if(source == null) {
198 return null;
199 }
200 StringBuffer ret = new StringBuffer();
201
202 // URLEncode the text string. This produces a very similar encoding to JavaSscript
203 // encoding, except the blank which is not encoded into a %20.
204 String enc = encode(source, encoding, true);
205 for(int z = 0;z < enc.length();z++) {
206 if(enc.charAt(z) == '+') {
207 ret.append("%20");
208 }
209 else {
210 ret.append(enc.charAt(z));
211 }
212 }
213 return ret.toString();
214 }
215
216 /**
217 * Escapes a String so it may be printed as text content or attribute
218 * value in a HTML page or an XML file.<p>
219 *
220 * This method replaces the following characters in a String:
221 * <ul>
222 * <li><b><</b> with &lt;
223 * <li><b>></b> with &gt;
224 * <li><b>&</b> with &amp;
225 * <li><b>"</b> with &quot;
226 * </ul>
227 *
228 * @param source the string to escape
229 * @return the escaped string
230 *
231 * @see #escapeHtml(String)
232 */
233 public static String escapeXml(String source) {
234 if (source == null) return null;
235 StringBuffer result = new StringBuffer(source.length()*2);
236 int terminatorIndex;
237 for(int i = 0;i < source.length(); ++i) {
238 char ch = source.charAt(i);
239 switch (ch) {
240 case '<' :
241 result.append("<");
242 break;
243 case '>' :
244 result.append(">");
245 break;
246 case '&' :
247 // Don't escape already escaped international and special characters
248 if ((terminatorIndex = source.indexOf(";",i)) > 0)
249 if(source.substring(i + 1, terminatorIndex).matches("#[0-9]+"))
250 result.append(ch);
251 else
252 result.append("&");
253 else
254 result.append("&");
255 break;
256 case '"' :
257 result.append(""");
258 break;
259 default :
260 result.append(ch);
261 }
262 }
263 return new String(result);
264 }
265
266 /**
267 * Escapes special characters in a HTML-String with their number-based
268 * entity representation, for example & becomes &#38;.<p>
269 *
270 * A character <code>num</code> is replaced if<br>
271 * <code>((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62)))</code><p>
272 *
273 * @param source the String to escape
274 * @return String the escaped String
275 *
276 * @see #escapeXml(String)
277 */
278 public static String escapeHtml(String source) {
279 int terminatorIndex;
280 if (source == null) return null;
281 StringBuffer result = new StringBuffer(source.length()*2);
282 for(int i = 0;i < source.length();i++) {
283 int ch = source.charAt(i);
284 // Avoid escaping already escaped characters;
285 if((ch == 38) && ((terminatorIndex = source.indexOf(";",i)) > 0)) {
286 if(source.substring(i + 1, terminatorIndex).matches("#[0-9]+|lt|gt|amp|quote")) {
287 result.append(source.substring(i, terminatorIndex + 1));
288 // Skip remaining chars up to (and including) ";"
289 i = terminatorIndex;
290 continue;
291 }
292 }
293 if((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62))) {
294 result.append("&#");
295 result.append(ch);
296 result.append(";");
297 }
298 else {
299 result.append((char)ch);
300 }
301 }
302 return new String(result);
303 }
304
305 /**
306 * Escapes non ASCII characters in a HTML-String with their number-based
307 * entity representation, for example & becomes &#38;.<p>
308 *
309 * A character <code>num</code> is replaced if<br>
310 * <code>(ch > 255)</code><p>
311 *
312 * @param source the String to escape
313 * @return String the escaped String
314 *
315 * @see #escapeXml(String)
316 */
317 public static String escapeNonAscii(String source) {
318 if (source == null) return null;
319 StringBuffer result = new StringBuffer(source.length()*2);
320 for(int i = 0;i < source.length();i++) {
321 int ch = source.charAt(i);
322 if(ch > 255) {
323 result.append("&#");
324 result.append(ch);
325 result.append(";");
326 }
327 else {
328 result.append((char)ch);
329 }
330 }
331 return new String(result);
332 }
333
334 /**
335 * Decodes a String in a way that is compatible with the JavaScript
336 * unescape function.
337 *
338 * @param Source The String to be decoded.
339 * @return The JavaScript unescaped String.
340 */
341 public static String unescape(String source, String encoding) {
342 if(source == null){
343 return null;
344 }
345 int len = source.length();
346 // to use standard decoder we need to replace '+' with "%20" (space)
347 StringBuffer preparedSource = new StringBuffer(len);
348 for (int i = 0; i < len; i++) {
349 char c = source.charAt(i);
350 if (c == '+') {
351 preparedSource.append("%20");
352 } else {
353 preparedSource.append(c);
354 }
355 }
356 return decode(preparedSource.toString(), encoding, true);
357 }
358
359 /**
360 * Changes the encoding of a byte array that represents a String.<p>
361 *
362 * @param input the byte array to convert
363 * @param oldEncoding the current encoding of the byte array
364 * @param newEncoding the new encoding of the byte array
365 * @return byte[] the byte array encoded in the new encoding
366 */
367 public static byte[] changeEncoding(byte[] input, String oldEncoding, String newEncoding) {
368 if ((oldEncoding == null) || (newEncoding == null)) return input;
369 if (oldEncoding.trim().equalsIgnoreCase(newEncoding.trim())) return input;
370 byte[] result = input;
371 try {
372 result = (new String(input, oldEncoding)).getBytes(newEncoding);
373 } catch (UnsupportedEncodingException e) {
374 // return value will be input value
375 }
376 return result;
377 }
378
379 /**
380 * Re-decodes a String that has not been correctly decoded and thus has scrambled
381 * character bytes.<p>
382 *
383 * This is an equivalent to the JavaScript "decodeURIComponent" function.
384 * It converts from the default "UTF-8" to the currently selected system encoding.<p>
385 *
386 * @param input the String to convert
387 * @return String the converted String
388 */
389 public static String redecodeUriComponent(String input) {
390 if (input == null) return input;
391 return new String(changeEncoding(input.getBytes(), C_URI_ENCODING, A_OpenCms.getDefaultEncoding()));
392 }
393 }