Source code: com/jcorporate/expresso/core/security/filters/HtmlPlusURLFilter.java
1 /* ====================================================================
2 * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
3 *
4 * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * 3. The end-user documentation included with the redistribution,
19 * if any, must include the following acknowledgment:
20 * "This product includes software developed by Jcorporate Ltd.
21 * (http://www.jcorporate.com/)."
22 * Alternately, this acknowledgment may appear in the software itself,
23 * if and wherever such third-party acknowledgments normally appear.
24 *
25 * 4. "Jcorporate" and product names such as "Expresso" must
26 * not be used to endorse or promote products derived from this
27 * software without prior written permission. For written permission,
28 * please contact info@jcorporate.com.
29 *
30 * 5. Products derived from this software may not be called "Expresso",
31 * or other Jcorporate product names; nor may "Expresso" or other
32 * Jcorporate product names appear in their name, without prior
33 * written permission of Jcorporate Ltd.
34 *
35 * 6. No product derived from this software may compete in the same
36 * market space, i.e. framework, without prior written permission
37 * of Jcorporate Ltd. For written permission, please contact
38 * partners@jcorporate.com.
39 *
40 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43 * DISCLAIMED. IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
44 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 * ====================================================================
53 *
54 * This software consists of voluntary contributions made by many
55 * individuals on behalf of the Jcorporate Ltd. Contributions back
56 * to the project(s) are encouraged when you make modifications.
57 * Please send them to support@jcorporate.com. For more information
58 * on Jcorporate Ltd. and its products, please see
59 * <http://www.jcorporate.com/>.
60 *
61 * Portions of this software are based upon other open source
62 * products and are subject to their respective licenses.
63 */
64
65 package com.jcorporate.expresso.core.security.filters;
66
67 import com.jcorporate.expresso.core.controller.ControllerRequest;
68 import com.jcorporate.expresso.core.controller.ServletControllerRequest;
69 import com.jcorporate.expresso.core.db.DBConnection;
70 import com.jcorporate.expresso.core.misc.StringUtil;
71 import com.jcorporate.expresso.services.dbobj.Setup;
72
73 import javax.servlet.http.HttpServletRequest;
74
75 /**
76 * This class provides a filter implementation for HTML output, protecting against
77 * XSS exploits, plus it creates anchor (<a>) tags for anything that starts
78 * with 'http://', 'www.', etc.
79 *
80 * @author Larry Hamel and Patti Schank
81 */
82 public class HtmlPlusURLFilter
83 extends HtmlFilter {
84
85 // URL types to search for (to add anchor tag)
86 public static final String[] URL_TYPES = {
87 "http://", "https://", "ftp://", "mailto:", "news:"
88 };
89
90 // These will be prepended with 'http://"
91 public static final String[] URL_INFORMAL_PREFIXES = {
92 "www.", "www2."
93 };
94
95 /**
96 * Name for Setup value which decides if we are limiting anchor labels
97 */
98 public static final String MAX_CHARS_IN_URL_LABEL = "MaxCharsURL_Label";
99
100
101 /**
102 * No-args constructor required
103 */
104 public HtmlPlusURLFilter()
105 throws IllegalArgumentException {
106 }
107
108 /**
109 * Constructor for passing strings and their replacements
110 *
111 * @param specialStringList Strings to replace
112 * @param replaceList The replacement strings
113 */
114 public HtmlPlusURLFilter(String[] specialStringList, String[] replaceList)
115 throws IllegalArgumentException {
116 super(specialStringList, replaceList);
117 }
118
119 /**
120 * This filter HTML encodes all special characters defined by the replacement
121 * list. If a particular character doesn't exist in the map, then the chracter
122 * is passed appended into the result set.
123 * If it does exist, then the value the special character maps to is appended
124 * into the list instead.
125 *
126 * @param data The string to encode.
127 * @return The filtered string
128 */
129 public String standardFilter(String data) {
130 String result = super.standardFilter(data);
131
132 return insertHrefTags(result);
133 } /* standardFilter(String) */
134
135
136 /**
137 * Given a url string, if it's null or equals "" then just return
138 * it as is. Otherwise check if it is valid form, that is, starts
139 * with http:// or ftp:// or some other valid url prefix. If not,
140 * prepend http://.
141 *
142 * @param url The url string
143 * @return The url with http:// prepended, if needed
144 */
145 public static String addHttpPrefixIfNeeded(String url) {
146 if (StringUtil.isBlankOrNull(url)) {
147 return url;
148 }
149 String validUrl = url;
150 if (!hasValidUrlPrefix(url)) {
151 validUrl = "http://" + url;
152 }
153 return validUrl;
154 }
155
156 /**
157 * Return true if the url has a valid prefix, like http://
158 *
159 * @param url
160 * @return
161 */
162 public static boolean hasValidUrlPrefix(String url) {
163 boolean valid = false;
164 for (int i = 0; i < URL_TYPES.length; i++) {
165 if (url.startsWith(URL_TYPES[i])) {
166 valid = true;
167 }
168 }
169 return valid;
170 }
171
172 /**
173 * Return true if the url is valid. Checks that it is not
174 * null, that it has a valid prefix, and that it contains
175 * a dot (must, to have a domain name) and at least 2 characters
176 * after the dot (the domain). Add more tests here as appropriate.
177 *
178 * @param url The candidate URL to check
179 * @return True if the URL is valid
180 */
181 public static boolean isValidUrl(String url) {
182 if (StringUtil.isBlankOrNull(url)) {
183 return false;
184 }
185 if (!hasValidUrlPrefix(url)) {
186 return false;
187 }
188 // does it have a domain name after a dot?
189 int dotIndex = url.indexOf(".");
190 if (dotIndex < 0) {
191 return false;
192 }
193 String domain = url.substring(dotIndex);
194 if (domain.length() < 2) {
195 return false;
196 }
197 return true;
198 }
199
200
201 /**
202 * Get web server address
203 *
204 * @return the address of this web server
205 */
206 public static String getWebHostPort(ControllerRequest request) {
207 ServletControllerRequest sreq = (ServletControllerRequest) request;
208 HttpServletRequest hreq = (HttpServletRequest) sreq.getServletRequest();
209 String serverDomainName = hreq.getServerName();
210 int serverPort = hreq.getServerPort();
211
212 if (serverPort != 80) {
213 serverDomainName = serverDomainName + ":" + serverPort;
214 }
215 return serverDomainName;
216 }
217
218 /**
219 * Insert a href tag around any http, https, www, or www2 strings
220 *
221 * @param s The string to search in and insert
222 * @return A String with <a href></a> tags and http:// if needed
223 */
224 public static String insertHrefTags(String s) {
225 boolean appendHttp = false;
226 String result = s;
227
228 int hIndex = -1;
229 for (int i = 0; i < URL_TYPES.length; i++) {
230 String urlType = URL_TYPES[i];
231 hIndex = s.indexOf(urlType);
232 if (hIndex != -1) {
233 break;
234 }
235 }
236
237 if (hIndex == -1) {
238 for (int i = 0; i < URL_INFORMAL_PREFIXES.length; i++) {
239 String urlType = URL_INFORMAL_PREFIXES[i];
240 hIndex = s.indexOf(urlType);
241 if (hIndex != -1) {
242 appendHttp = true;
243 break;
244 }
245
246 hIndex = s.indexOf(urlType.toUpperCase());
247 if (hIndex != -1) {
248 appendHttp = true;
249 break;
250 }
251 }
252
253 }
254
255 if (hIndex >= 0) {
256 int endIndex = findEndOfHref(s, hIndex);
257 String href = s.substring(hIndex, endIndex);
258
259 // XSS protection: cannot have '<', and we have already stripped for this
260 // in initial filtering. However, within anchor, cannot have equivalent
261 // hex or < within URL, so that <script> cannot be entered
262 // todo use String.replace() regexp in jdk1.4 after expresso raises requirements
263 href = StringUtil.replaceAll(href, "<", "");
264 href = StringUtil.replaceAll(href, "<", "");
265 href = StringUtil.replaceAll(href, "&lT;", "");
266 href = StringUtil.replaceAll(href, "≪", "");
267 href = StringUtil.replaceAll(href, "%3c", "");
268 href = StringUtil.replaceAll(href, "%3C", "");
269
270 StringBuffer link = new StringBuffer();
271 link.append(" <a href=\"");
272 if (appendHttp) {
273 link.append("http://");
274 }
275 link.append(href);
276 link.append("\" target=\"_blank\">");
277
278 // If string is long, use ellipses if this setup value is set
279 // Unfortunately, we don't have access to dbname here
280 String max = Setup.getValueUnrequired(DBConnection.DEFAULT_DB_CONTEXT_NAME, MAX_CHARS_IN_URL_LABEL);
281 if (max != null) {
282 try {
283 int maxchars = Integer.parseInt(max);
284 if (href.length() > maxchars) {
285 link.append(href.substring(0, maxchars));
286 link.append("…"); // ellipses
287 } else {
288 link.append(href);
289 }
290 } catch (Exception e) {
291 e.printStackTrace();
292 }
293 } else {
294 link.append(href);
295 }
296 link.append("</a>");
297
298 String linksBefore = "";
299 String linksAfter = "";
300
301 // recurse
302 if (hIndex > 5) {
303 linksBefore = insertHrefTags(s.substring(0, hIndex));
304 }
305 if (endIndex != s.length()) {
306 linksAfter = insertHrefTags(s.substring(endIndex));
307 }
308
309
310 return linksBefore + link.toString() + linksAfter;
311
312 } else {
313 return result;
314 }
315 }
316
317 /**
318 * Finds the end of a hyperlink
319 *
320 * @param s The string
321 * @param start The url's starting index
322 */
323 public static int findEndOfHref(String s, int start) {
324 char[] chars = s.toCharArray();
325 int end = s.length();
326
327 for (int i = start; i < end; i++) {
328 char c = chars[i];
329
330 if (Character.isLetterOrDigit(c)) {
331 continue;
332 }
333
334 /**
335 * Legal punctuation in URLs (see RFC 2396
336 * ftp://ftp.isi.edu/in-notes/rfc2396.txt)
337 ; / ? : @ & = +
338 $ , - _" . ! ~ * ' ( )
339 % #
340 */
341 switch (c) { // legal punctuation in URLS
342 case '.':
343 case ',':
344 case ')':
345 case '(':
346 case '@':
347 case '?':
348 case '&':
349 case '=':
350 case '-':
351 case '_':
352 case '/':
353 case '#':
354 case ':':
355 case '~':
356 case '+':
357 case ';':
358 case '!':
359 case '*':
360 case '\'':
361 case '$':
362 continue; // legal punctuation in URL
363
364 case '%':
365 // Special case, indicates a URL encoding follows
366 // Malicious XSS could abuse encoding to slip scripts
367 // through. Only allow encoding of safe hex characters
368 if ((i < (end - 2)) && (isSafeURLEncoding(chars[i + 1], chars[i + 2]))) {
369 continue;
370 } else {
371 return i; // unsafe URL encoding
372 }
373
374 default:
375 return i; // illegal punctuation--must be end of URL
376 }
377 }
378 return end;
379 }
380
381 /**
382 * Return true if we allow the given URL encoding (after a %).
383 * See http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
384 * To stop XSS attacks, definitely don't allow:
385 * the less than symbol < (3C) and the greater than symbol > (3E)
386 * 00-1F hex (ascii control characters)
387 * 80-FF hex (non-ascii, by definition not legal)
388 * <p/>
389 * For extra safety, let's not allow the following (add later if needed)
390 * quote (%27), left paren (%28), right paren (%29)
391 * left bracket (7B), right bracket (7D)
392 * <p/>
393 * Okay to allow as encoded (might be misunderstood within URLS):s
394 * space (20), ! (21), " (22), # (23), $ (24)
395 * % (25), & (26), * (2A), + (2B), comma (2C)
396 * - (2D), period (2E), / (2F), : (3A), ; (3B),
397 * = (3D), ? (3F), @ (40)
398 * | (7C), \ (5C), ~ (7E)
399 * <p/>
400 * The following are also okay, but shouldn't be encoded anyway, so don't
401 * bother checking for these for now:
402 * digits: 30-39
403 * uppercase letters: 41 - 5A
404 * lowercase letters: 61 - 7A
405 */
406 private static boolean isSafeURLEncoding(char c1, char c2) {
407 String[] allowedEncodings = {"20", "21", "22", "23", "24", "25",
408 "26", "2A", "2B", "2C", "2D", "2E", "2F",
409 "3A", "3B", "3D", "3F", "40", "7C",
410 "5C", "7E"};
411
412 String encodedCharStr = String.valueOf(c1) + String.valueOf(c2);
413
414 // test for containment of safe encoding characters
415 for (int i = 0; i < allowedEncodings.length; i++) {
416 if (encodedCharStr.startsWith(allowedEncodings[i])) {
417 return true;
418 }
419 }
420 // otherwise assume encoded characters are unsafe
421 return false;
422 }
423 }