Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/jcorporate/expresso/core/security/filters/HtmlPlusURLFilter.java


1   /* ====================================================================
2    * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
3    *
4    * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
5    *
6    * Redistribution and use in source and binary forms, with or without
7    * modification, are permitted provided that the following conditions
8    * are met:
9    *
10   * 1. Redistributions of source code must retain the above copyright
11   *    notice, this list of conditions and the following disclaimer.
12   *
13   * 2. Redistributions in binary form must reproduce the above copyright
14   *    notice, this list of conditions and the following disclaimer in
15   *    the documentation and/or other materials provided with the
16   *    distribution.
17   *
18   * 3. The end-user documentation included with the redistribution,
19   *    if any, must include the following acknowledgment:
20   *       "This product includes software developed by Jcorporate Ltd.
21   *        (http://www.jcorporate.com/)."
22   *    Alternately, this acknowledgment may appear in the software itself,
23   *    if and wherever such third-party acknowledgments normally appear.
24   *
25   * 4. "Jcorporate" and product names such as "Expresso" must
26   *    not be used to endorse or promote products derived from this
27   *    software without prior written permission. For written permission,
28   *    please contact info@jcorporate.com.
29   *
30   * 5. Products derived from this software may not be called "Expresso",
31   *    or other Jcorporate product names; nor may "Expresso" or other
32   *    Jcorporate product names appear in their name, without prior
33   *    written permission of Jcorporate Ltd.
34   *
35   * 6. No product derived from this software may compete in the same
36   *    market space, i.e. framework, without prior written permission
37   *    of Jcorporate Ltd. For written permission, please contact
38   *    partners@jcorporate.com.
39   *
40   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43   * DISCLAIMED.  IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
44   * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45   * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46   * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51   * SUCH DAMAGE.
52   * ====================================================================
53   *
54   * This software consists of voluntary contributions made by many
55   * individuals on behalf of the Jcorporate Ltd. Contributions back
56   * to the project(s) are encouraged when you make modifications.
57   * Please send them to support@jcorporate.com. For more information
58   * on Jcorporate Ltd. and its products, please see
59   * <http://www.jcorporate.com/>.
60   *
61   * Portions of this software are based upon other open source
62   * products and are subject to their respective licenses.
63   */
64  
65  package com.jcorporate.expresso.core.security.filters;
66  
67  import com.jcorporate.expresso.core.controller.ControllerRequest;
68  import com.jcorporate.expresso.core.controller.ServletControllerRequest;
69  import com.jcorporate.expresso.core.db.DBConnection;
70  import com.jcorporate.expresso.core.misc.StringUtil;
71  import com.jcorporate.expresso.services.dbobj.Setup;
72  
73  import javax.servlet.http.HttpServletRequest;
74  
75  /**
76   * This class provides a filter implementation for HTML output, protecting against
77   * XSS exploits, plus it creates anchor (<a>) tags for anything that starts
78   * with 'http://', 'www.', etc.
79   *
80   * @author Larry Hamel and Patti Schank
81   */
82  public class HtmlPlusURLFilter
83          extends HtmlFilter {
84  
85      // URL types to search for (to add anchor tag)
86      public static final String[] URL_TYPES = {
87          "http://", "https://", "ftp://", "mailto:", "news:"
88      };
89  
90      // These will be prepended with 'http://"
91      public static final String[] URL_INFORMAL_PREFIXES = {
92          "www.", "www2."
93      };
94  
95      /**
96       * Name for Setup value which decides if we are limiting anchor labels
97       */
98      public static final String MAX_CHARS_IN_URL_LABEL = "MaxCharsURL_Label";
99  
100 
101     /**
102      * No-args constructor required
103      */
104     public HtmlPlusURLFilter()
105             throws IllegalArgumentException {
106     }
107 
108     /**
109      * Constructor for passing strings and their replacements
110      *
111      * @param specialStringList Strings to replace
112      * @param replaceList       The replacement strings
113      */
114     public HtmlPlusURLFilter(String[] specialStringList, String[] replaceList)
115             throws IllegalArgumentException {
116         super(specialStringList, replaceList);
117     }
118 
119     /**
120      * This filter HTML encodes all special characters defined by the replacement
121      * list. If a particular character doesn't exist in the map, then the chracter
122      * is passed appended into the result set.
123      * If it does exist, then the value the special character maps to is appended
124      * into the list instead.
125      *
126      * @param data The string to encode.
127      * @return The filtered string
128      */
129     public String standardFilter(String data) {
130         String result = super.standardFilter(data);
131 
132         return insertHrefTags(result);
133     } /* standardFilter(String) */
134 
135 
136     /**
137      * Given a url string, if it's null or equals "" then just return
138      * it as is. Otherwise check if it is valid form, that is, starts
139      * with http:// or ftp:// or some other valid url prefix. If not,
140      * prepend http://.
141      *
142      * @param url The url string
143      * @return The url with http:// prepended, if needed
144      */
145     public static String addHttpPrefixIfNeeded(String url) {
146         if (StringUtil.isBlankOrNull(url)) {
147             return url;
148         }
149         String validUrl = url;
150         if (!hasValidUrlPrefix(url)) {
151             validUrl = "http://" + url;
152         }
153         return validUrl;
154     }
155 
156     /**
157      * Return true if the url has a valid prefix, like http://
158      *
159      * @param url
160      * @return
161      */
162     public static boolean hasValidUrlPrefix(String url) {
163         boolean valid = false;
164         for (int i = 0; i < URL_TYPES.length; i++) {
165             if (url.startsWith(URL_TYPES[i])) {
166                 valid = true;
167             }
168         }
169         return valid;
170     }
171 
172     /**
173      * Return true if the url is valid. Checks that it is not
174      * null, that it has a valid prefix, and that it contains
175      * a dot (must, to have a domain name) and at least 2 characters
176      * after the dot (the domain). Add more tests here as appropriate.
177      *
178      * @param url The candidate URL to check
179      * @return True if the URL is valid
180      */
181     public static boolean isValidUrl(String url) {
182         if (StringUtil.isBlankOrNull(url)) {
183             return false;
184         }
185         if (!hasValidUrlPrefix(url)) {
186             return false;
187         }
188         // does it have a domain name after a dot?
189         int dotIndex = url.indexOf(".");
190         if (dotIndex < 0) {
191             return false;
192         }
193         String domain = url.substring(dotIndex);
194         if (domain.length() < 2) {
195             return false;
196         }
197         return true;
198     }
199 
200 
201     /**
202      * Get web server address
203      *
204      * @return the address of this web server
205      */
206     public static String getWebHostPort(ControllerRequest request) {
207         ServletControllerRequest sreq = (ServletControllerRequest) request;
208         HttpServletRequest hreq = (HttpServletRequest) sreq.getServletRequest();
209         String serverDomainName = hreq.getServerName();
210         int serverPort = hreq.getServerPort();
211 
212         if (serverPort != 80) {
213             serverDomainName = serverDomainName + ":" + serverPort;
214         }
215         return serverDomainName;
216     }
217 
218     /**
219      * Insert a href tag around any http, https, www, or www2 strings
220      *
221      * @param s The string to search in and insert
222      * @return A String with <a href></a> tags and http:// if needed
223      */
224     public static String insertHrefTags(String s) {
225         boolean appendHttp = false;
226         String result = s;
227 
228         int hIndex = -1;
229         for (int i = 0; i < URL_TYPES.length; i++) {
230             String urlType = URL_TYPES[i];
231             hIndex = s.indexOf(urlType);
232             if (hIndex != -1) {
233                 break;
234             }
235         }
236 
237         if (hIndex == -1) {
238             for (int i = 0; i < URL_INFORMAL_PREFIXES.length; i++) {
239                 String urlType = URL_INFORMAL_PREFIXES[i];
240                 hIndex = s.indexOf(urlType);
241                 if (hIndex != -1) {
242                     appendHttp = true;
243                     break;
244                 }
245 
246                 hIndex = s.indexOf(urlType.toUpperCase());
247                 if (hIndex != -1) {
248                     appendHttp = true;
249                     break;
250                 }
251             }
252 
253         }
254 
255         if (hIndex >= 0) {
256             int endIndex = findEndOfHref(s, hIndex);
257             String href = s.substring(hIndex, endIndex);
258 
259             // XSS protection: cannot have '<', and we have already stripped for this
260             // in initial filtering. However, within anchor, cannot have equivalent
261             // hex or &lt; within URL, so that <script> cannot be entered
262             // todo use String.replace() regexp in jdk1.4 after expresso raises requirements
263             href = StringUtil.replaceAll(href, "&lt;", "");
264             href = StringUtil.replaceAll(href, "&LT;", "");
265             href = StringUtil.replaceAll(href, "&lT;", "");
266             href = StringUtil.replaceAll(href, "&Lt;", "");
267             href = StringUtil.replaceAll(href, "%3c", "");
268             href = StringUtil.replaceAll(href, "%3C", "");
269 
270             StringBuffer link = new StringBuffer();
271             link.append(" <a href=\"");
272             if (appendHttp) {
273                 link.append("http://");
274             }
275             link.append(href);
276             link.append("\" target=\"_blank\">");
277 
278             // If string is long, use ellipses if this setup value is set
279             // Unfortunately, we don't have access to dbname here
280             String max = Setup.getValueUnrequired(DBConnection.DEFAULT_DB_CONTEXT_NAME, MAX_CHARS_IN_URL_LABEL);
281             if (max != null) {
282                 try {
283                     int maxchars = Integer.parseInt(max);
284                     if (href.length() > maxchars) {
285                         link.append(href.substring(0, maxchars));
286                         link.append("&#133"); // ellipses
287                     } else {
288                         link.append(href);
289                     }
290                 } catch (Exception e) {
291                     e.printStackTrace();
292                 }
293             } else {
294                 link.append(href);
295             }
296             link.append("</a>");
297 
298             String linksBefore = "";
299             String linksAfter = "";
300 
301             // recurse
302             if (hIndex > 5) {
303                 linksBefore = insertHrefTags(s.substring(0, hIndex));
304             }
305             if (endIndex != s.length()) {
306                 linksAfter = insertHrefTags(s.substring(endIndex));
307             }
308 
309 
310             return linksBefore + link.toString() + linksAfter;
311 
312         } else {
313             return result;
314         }
315     }
316 
317     /**
318      * Finds the end of a hyperlink
319      *
320      * @param s     The string
321      * @param start The url's starting index
322      */
323     public static int findEndOfHref(String s, int start) {
324         char[] chars = s.toCharArray();
325         int end = s.length();
326 
327         for (int i = start; i < end; i++) {
328             char c = chars[i];
329 
330             if (Character.isLetterOrDigit(c)) {
331                 continue;
332             }
333 
334             /**
335              * Legal punctuation in URLs (see RFC 2396
336              * ftp://ftp.isi.edu/in-notes/rfc2396.txt)
337              ;  /  ?  :  @  &  =  +
338              $  ,  -  _"  .  !  ~  *  '  (  )
339              %  #
340              */
341             switch (c) {  // legal punctuation in URLS
342                 case '.':
343                 case ',':
344                 case ')':
345                 case '(':
346                 case '@':
347                 case '?':
348                 case '&':
349                 case '=':
350                 case '-':
351                 case '_':
352                 case '/':
353                 case '#':
354                 case ':':
355                 case '~':
356                 case '+':
357                 case ';':
358                 case '!':
359                 case '*':
360                 case '\'':
361                 case '$':
362                     continue; // legal punctuation in URL
363 
364                 case '%':
365                     // Special case, indicates a URL encoding follows
366                     // Malicious XSS could abuse encoding to slip scripts
367                     // through. Only allow encoding of safe hex characters
368                     if ((i < (end - 2)) && (isSafeURLEncoding(chars[i + 1], chars[i + 2]))) {
369                         continue;
370                     } else {
371                         return i; // unsafe URL encoding
372                     }
373 
374                 default:
375                     return i; // illegal punctuation--must be end of URL
376             }
377         }
378         return end;
379     }
380 
381     /**
382      * Return true if we allow the given URL encoding (after a %).
383      * See http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
384      * To stop XSS attacks, definitely don't allow:
385      * the less than symbol < (3C) and the greater than symbol > (3E)
386      * 00-1F hex (ascii control characters)
387      * 80-FF hex (non-ascii, by definition not legal)
388      * <p/>
389      * For extra safety, let's not allow the following (add later if needed)
390      * quote (%27), left paren (%28), right paren (%29)
391      * left bracket (7B), right bracket (7D)
392      * <p/>
393      * Okay to allow as encoded (might be misunderstood within URLS):s
394      * space (20), ! (21), " (22), # (23), $ (24)
395      * % (25), & (26), * (2A), + (2B), comma (2C)
396      * - (2D), period (2E), / (2F), : (3A), ; (3B),
397      * = (3D), ? (3F), @ (40)
398      * | (7C), \ (5C), ~ (7E)
399      * <p/>
400      * The following are also okay, but shouldn't be encoded anyway, so don't
401      * bother checking for these for now:
402      * digits: 30-39
403      * uppercase letters: 41 - 5A
404      * lowercase letters: 61 - 7A
405      */
406     private static boolean isSafeURLEncoding(char c1, char c2) {
407         String[] allowedEncodings = {"20", "21", "22", "23", "24", "25",
408                                      "26", "2A", "2B", "2C", "2D", "2E", "2F",
409                                      "3A", "3B", "3D", "3F", "40", "7C",
410                                      "5C", "7E"};
411 
412         String encodedCharStr = String.valueOf(c1) + String.valueOf(c2);
413 
414         // test for containment of safe encoding characters
415         for (int i = 0; i < allowedEncodings.length; i++) {
416             if (encodedCharStr.startsWith(allowedEncodings[i])) {
417                 return true;
418             }
419         }
420         // otherwise assume encoded characters are unsafe
421         return false;
422     }
423 }