1 /**
2 * Licensed under the Artistic License; you may not use this file
3 * except in compliance with the License.
4 * You may obtain a copy of the License at
5 *
6 * http://displaytag.sourceforge.net/license.html
7 *
8 * THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
9 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
10 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
11 */
12 package org.displaytag.util;
13
14 import java.util.ArrayList;
15 import java.util.List;
16
17
18 /**
19 * Utility methods for dealing with html tags.
20 * @author Fabrizio Giustina
21 * @version $Revision: 1081 $ ($Author: fgiust $)
22 */
23 public final class HtmlTagUtil
24 {
25
26 /**
27 * don't instantiate a new HtmlTagUtil.
28 */
29 private HtmlTagUtil()
30 {
31 // unused
32 }
33
34 /**
35 * costruct a tag from a name and a collection of attributes.
36 * @param tagName String tag name
37 * @param attributes HtmlAttributeMap containing all the tag attributes
38 * @return String open tag with attributes
39 */
40 public static String createOpenTagString(String tagName, HtmlAttributeMap attributes)
41 {
42
43 StringBuffer buffer = new StringBuffer();
44
45 buffer.append(TagConstants.TAG_OPEN).append(tagName);
46
47 if (attributes != null)
48 {
49 buffer.append(attributes.toString());
50 }
51 buffer.append(TagConstants.TAG_CLOSE);
52
53 return buffer.toString();
54
55 }
56
57 /**
58 * Strips html tags from a String.
59 * @param str input string containing html tags (<code>null</code> is <strong>not </strong> handled)
60 * @return input message without tags
61 */
62 public static String stripHTMLTags(String str)
63 {
64 // operate on chars to avoid heavy string operations on jdk 1.3
65 int len = str.length();
66 char[] value = str.toCharArray();
67 StringBuffer dest = new StringBuffer(len + 16);
68 boolean intag = false;
69
70 for (int j = 0; j < len; j++)
71 {
72 char c = value[j];
73 if (intag)
74 {
75 if (c == '>')
76 {
77 intag = false;
78 }
79 }
80 else
81 {
82 switch (c)
83 {
84 case '"' :
85 dest.append("""); // encode quotes, this could be used as a tag attribute value
86 break;
87 case '<' :
88 intag = true;
89 break;
90 default :
91 dest.append(c);
92 break;
93 }
94 }
95 }
96
97 return dest.toString();
98 }
99
100 /**
101 * Abbreviates a String which can contain html tags. Html tags are not counted in String length. It also try to
102 * handle open tags and html entities.
103 * @param str full String. <code>null</code> is handled by returning <code>null</code>
104 * @param maxLength maximum number of characters (excluding tags)
105 * @param byNumberOfWords if <code>true</code> maxLength will be the number of words returned, elsewhere will
106 * represent the number of characters.
107 * @return abbreviated String
108 */
109 public static String abbreviateHtmlString(String str, int maxLength, boolean byNumberOfWords)
110 {
111 if (str == null || str.length() <= maxLength)
112 {
113 // quick exit to avoid useless creation of a Stringbuffer
114 return str;
115 }
116
117 int sz = str.length();
118 StringBuffer buffer = new StringBuffer(sz);
119
120 // some spaghetti code for quick & dirty tag handling and entity detection
121 boolean inTag = false; // parsing a tag
122 boolean inTagName = false; // parsing a tag name
123 boolean endingTag = false; // parsing an ending tag
124 int count = 0; // chars/words added
125 boolean chopped = false; // result has been chopped?
126 int entityChars = 0; // number of chars in parsed entity
127
128 StringBuffer currentTag = new StringBuffer(5); // will contain a tag name
129
130 List openTags = new ArrayList(5); // lit of unclosed tags found in the string
131
132 int i;
133 for (i = 0; i < sz; i++)
134 {
135 if (count >= maxLength)
136 {
137 chopped = true;
138 break;
139 }
140
141 char c = str.charAt(i);
142
143 if (c == '<')
144 {
145 inTag = true;
146 inTagName = true;
147 }
148 else if (inTag)
149 {
150 if (inTagName && c == '/')
151 {
152
153 if (currentTag.length() == 0)
154 {
155 // end tag found
156 endingTag = true;
157 }
158 else
159 {
160 // empty tag, reset and don't save
161 inTagName = false;
162 }
163
164 currentTag = new StringBuffer(5);
165 }
166 else if (inTagName && (c == ' ' || c == '>'))
167 {
168 inTagName = false;
169
170 if (!endingTag)
171 {
172 openTags.add(currentTag.toString());
173 }
174 else
175 {
176 openTags.remove(currentTag.toString());
177 }
178 currentTag = new StringBuffer(5);
179 if (c == '>')
180 {
181 inTag = false;
182 }
183 }
184 else if (c == '>')
185 {
186 inTag = false;
187 }
188 else if (inTagName)
189 {
190 currentTag.append(c);
191 }
192
193 }
194 else
195 {
196
197 if (byNumberOfWords)
198 {
199 if (Character.isWhitespace(c))
200 {
201 count++;
202 }
203 }
204 else
205 {
206 // handle entities
207 if (c == '&')
208 {
209 entityChars = 1;
210 }
211 else if (entityChars == 0)
212 {
213 count++;
214 }
215 else
216 {
217 // end entity
218 if (entityChars > 0 && c == ';')
219 {
220 entityChars = 0;
221 count++;
222 }
223 else
224 {
225 entityChars++;
226 }
227 if (entityChars > 5)
228 {
229 // assume an unescaped & if entity doesn't close after max 5 chars
230 count += entityChars;
231 entityChars = 0;
232 }
233 }
234 }
235
236 }
237
238 if (inTag || (!byNumberOfWords || count < maxLength))
239 {
240 buffer.append(c);
241 }
242 }
243
244 if (chopped)
245 {
246 buffer.append("...");
247 }
248
249 if (openTags.size() > 0)
250 {
251 // quickly fixes closed tags
252 String remainingToken = str.substring(i);
253
254 for (int j = openTags.size() - 1; j >= 0; j--)
255 {
256 String closingTag = "</" + openTags.get(j) + ">";
257
258 // we only add closing tags that exists in the original String, so we don't have to understand
259 // html/xhtml differences and keep a list of html unclosed tags
260 if (remainingToken.indexOf(closingTag) > -1)
261 {
262 buffer.append(closingTag);
263 }
264 }
265 }
266
267 return buffer.toString();
268 }
269
270 }