Source code: org/roller/presentation/velocity/plugins/textile/Textile.java
1 /**
2 * Copyright (c) 2003, David A. Czarnecki
3 * All rights reserved.
4 *
5 * Portions Copyright (c) 2003 by Mark Lussier
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
14 * Neither the name of the "David A. Czarnecki" and "blojsom" nor the names of
15 * its contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 * Products derived from this software may not be called "blojsom",
18 * nor may "blojsom" appear in their name, without prior written permission of
19 * David A. Czarnecki.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
22 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
23 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
25 * EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
33 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 package org.roller.presentation.velocity.plugins.textile;
37
38 import java.util.ArrayList;
39 import java.util.List;
40 import java.util.StringTokenizer;
41 import java.util.regex.Matcher;
42 import java.util.regex.Pattern;
43
44 /**
45 * Textile
46 *
47 * @author Mark Lussier
48 * @version $Id: Textile.java,v 1.2 2003/09/03 21:21:54 lavandowska Exp $
49 */
50 public class Textile implements TextileConstants {
51
52 /**
53 *
54 */
55 public Textile() {
56 }
57
58
59 public String process(String content) {
60
61 /**
62 * Turn any incoming ampersands into a dummy character for now.
63 * This uses a negative lookahead for alphanumerics followed by a semicolon,
64 * implying an incoming html entity, to be skipped
65 */
66 //text = preg_replace("&(?![#a-zA-Z0-9]+;)","x%x%",text);
67 content = content.replaceAll(EXP_AMPERSAND, EXP_AMPERSAND_REPLACE);
68
69 /**
70 * unentify angle brackets and ampersands
71 */
72 content = replace(content, ">", ">");
73 content = replace(content, "<", "<");
74 content = replace(content, "&", "&");
75
76
77
78
79 /**
80 * zap carriage returns
81 * @todo optimize
82 */
83 content = replace(content, "\r\n", "\n");
84
85 /**
86 * zap tabs
87 * @todo optimize
88 */
89 content = replace(content, "\t", "");
90
91
92 /**
93 * trim each line
94 */
95 StringBuffer splitBuffer = new StringBuffer();
96 StringTokenizer tokenizer = new StringTokenizer(content, "\n", true);
97 while (tokenizer.hasMoreTokens()) {
98 splitBuffer.append(tokenizer.nextToken().trim());
99 splitBuffer.append("\n");
100 }
101
102 content = splitBuffer.toString();
103
104 //### Find and replace quick tags
105
106
107 /**
108 * double equal signs mean <notextile>
109 */
110 content = content.replaceAll(EXP_DOUBLEQUOTE_MATCH, EXP_DOUBLEQUOTE_REPLACE);
111
112
113 /**
114 * image qtag
115 */
116 content = content.replaceAll(EXP_IMAGE_QTAG_MATCH, EXP_IMAGE_QTAG_REPLACE);
117
118 //# image with hyperlink
119 //text = preg_replace("(<img.+ \\/>):(\\S+)","<a href=\"$2\">$1</a>",text);
120
121 /**
122 * hyperlink qtag
123 */
124 content = content.replaceAll(EXP_HREF_QTAG_MATCH, EXP_HREF_QTAG_REPLACE);
125
126
127 /**
128 * loop through the array, replacing qtags with html
129 */
130
131 for (int x = 0; x < EXP_PHRASE_MODIFIER_SOURCETAGS.length; x++) {
132 content.replaceAll("(^|\\s|>)" + EXP_PHRASE_MODIFIER_SOURCETAGS[x] + "\\b(.+?)\\b([^\\w\\s]*?)"
133 + EXP_PHRASE_MODIFIER_SOURCETAGS[x] + "([^\\w\\s]{0,2})(\\s|$)?"
134 , "$1<" + EXP_PHRASE_MODIFIER_REPLACETAGS[x] + ">$2$3</" + EXP_PHRASE_MODIFIER_REPLACETAGS[x] + ">$4");
135 }
136
137
138
139
140
141
142
143 /**
144 * From the Origional Docs:
145 * "some weird bs with underscores and \b word boundaries,
146 * so we'll do those on their own"
147 */
148 content = content.replaceAll(EXP_EMPHASIS_MATCH, EXP_EMPHASIS_REPLACE);
149 content = content.replaceAll(EXP_ITALICS_MATCH, EXP_ITALICS_REPLACE);
150 content = content.replaceAll(EXP_SUPERSCRIPT_MATCH, EXP_SUPERSCRIPT_REPLACE);
151
152
153 /**
154 * small problem with double quotes at the end of a string
155 */
156 content = content.replaceAll(EXP_EOL_DBL_QUOTES, " ");
157
158
159 String[] glyphMatches = {EXP_SINGLE_CLOSING,
160 EXP_SINGLE_OPENING,
161 EXP_DOUBLE_CLOSING,
162 EXP_DOUBLE_OPENING,
163 EXP_ELLIPSES,
164 EXP_3UPPER_ACCRONYM,
165 EXP_3UPPERCASE_CAPS,
166 EXP_EM_DASH,
167 EXP_EN_DASH,
168 EXP_EN_DECIMAL_DASH,
169 EXP_DIMENSION_SIGN,
170 EXP_TRADEMARK,
171 EXP_REGISTERED,
172 EXP_COPYRIGHT};
173
174
175 String[] glyphReplacement = {REPLACE_SINGLE_CLOSING,
176 REPLACE_SINGLE_OPENING,
177 REPLACE_DOUBLE_CLOSING,
178 REPLACE_DOUBLE_OPENING,
179 REPLACE_ELLIPSES,
180 REPLACE_3UPPER_ACCRONYM,
181 REPLACE_3UPPERCASE_CAPS,
182 REPLACE_EM_DASH,
183 REPLACE_EN_DASH,
184 REPLACE_EN_DECIMAL_DASH,
185 REPLACE_DIMENSION_SIGN,
186 REPLACE_TRADEMARK,
187 REPLACE_REGISTERED,
188 REPLACE_COPYRIGHT};
189
190
191 boolean ishtml = Pattern.compile(EXP_ISHTML).matcher(content).find();
192 boolean inpreservation = false;
193
194 if (!ishtml) {
195 content = arrayReplaceAll(content, glyphMatches, glyphReplacement);
196 } else {
197 String[] segments = splitContent(EXP_ISHTML, content);
198
199 StringBuffer segmentBuffer = new StringBuffer();
200 for (int x = 0; x < segments.length; x++) {
201 // # matches are off if we're between <code>, <pre> etc.
202 if (segments[x].toLowerCase().matches(EXP_STARTPRESERVE)) {
203 inpreservation = true;
204 } else if (segments[x].toLowerCase().matches(EXP_ENDPRESERVE)) {
205 inpreservation = false;
206 }
207
208 if (!Pattern.compile(EXP_ISHTML).matcher(segments[x]).find() && !inpreservation) {
209 segments[x] = arrayReplaceAll(segments[x], glyphMatches, glyphReplacement);
210 }
211
212 //# convert htmlspecial if between <code>
213 if (inpreservation) {
214 segments[x] = htmlSpecialChars(segments[x], MODE_ENT_NOQUOTES);
215 segments[x] = replace(segments[x], "<pre>", "<pre>");
216 segments[x] = replace(segments[x], "<code>", "<code>");
217 segments[x] = replace(segments[x], "<notextile>", "<notextile>");
218 }
219
220 segmentBuffer.append(segments[x]);
221
222 }
223
224 content = segmentBuffer.toString();
225
226 }
227
228
229 //### Block level formatting
230
231 //# deal with forced breaks; this is going to be a problem between
232 //# <pre> tags, but we'll clean them later
233
234 content = content.replaceAll(EXP_FORCESLINEBREAKS, REPLACE_FORCESLINEBREAK);
235
236 //# might be a problem with lists
237 content = replace(content, "l><br />", "l>\n");
238
239
240 String[] blockMatches = {EXP_BULLETED_LIST,
241 EXP_NUMERIC_LIST,
242 EXP_BLOCKQUOTE,
243 EXP_HEADER_WITHCLASS,
244 EXP_HEADER,
245 EXP_PARA_WITHCLASS,
246 EXP_PARA,
247 EXP_REMAINING_PARA};
248
249 String[] blockReplace = {REPLACE_BULLETED_LIST,
250 REPLACE_NUMERIC_LIST,
251 REPLACE_BLOCKQUOTE,
252 REPLACE_HEADER_WITHCLASS,
253 REPLACE_HEADER,
254 REPLACE_PARA_WITHCLASS,
255 REPLACE_PARA,
256 REPLACE_REMAINING_PARA};
257
258
259 StringBuffer blockBuffer = new StringBuffer();
260 String list = "";
261 content += " \n";
262
263 boolean inpre = false;
264 //# split the text into an array by newlines
265 StringTokenizer blockTokenizer = new StringTokenizer(content, "\n", false);
266
267 while (blockTokenizer.hasMoreTokens()) {
268 String line = blockTokenizer.nextToken();
269
270 //#make sure the line isn't blank
271 if (!line.matches("^$")) {
272
273 //# matches are off if we're between <pre> or <code> tags
274 if (line.toLowerCase().indexOf("<pre>") > -1) {
275 inpre = true;
276 }
277
278 //# deal with block replacements first, then see if we're in a list
279 if (!inpre) {
280 line = arrayReplaceAll(line, blockMatches, blockReplace);
281 }
282
283 //# kill any br tags that slipped in earlier
284 if (inpre) {
285 line = replace(line, "<br />", "\n");
286 line = replace(line, "<br/>", "\n");
287 }
288 //# matches back on after </pre>
289 if (line.toLowerCase().indexOf("</pre>") > -1) {
290 inpre = false;
291 }
292
293 //# at the beginning of a list, $line switches to a value
294 boolean islist = Pattern.compile(EXP_LISTSTART).matcher(line).find();
295 boolean islistline = Pattern.compile(EXP_LISTSTART + list).matcher(line).find();
296 if (list.length() == 0 && islist) {
297 line = line.replaceAll(EXP_MATCHLIST, REPLACE_MATCHLIST);
298 list = line.substring(2, 3);
299
300 //# at the end of a list, $line switches to empty
301 } else if (list.length() > 0 && !islistline) {
302 line = line.replaceAll(EXP_ENDMATCHLIST, "</" + list + REPLACE_ENDMATCHLIST);
303 list = "";
304 }
305 }
306 // push each line to a new array once it's processed
307 blockBuffer.append(line);
308 blockBuffer.append("\n");
309
310 }
311
312 content = blockBuffer.toString();
313
314
315 //#clean up <notextile>
316 content = content.replaceAll("<\\/?notextile>", "");
317
318 //# clean up liu and lio
319 content = content.replaceAll("<(\\/?)li(u|o)>", "<$1li>");
320
321 //# turn the temp char back to an ampersand entity
322 content = replace(content,"x%x%","&");
323
324 //# Newline linebreaks, just for markup tidiness
325 content= replace(content,"<br />","<br />\n");
326
327
328 return content;
329
330 }
331
332 /**
333 * An implementation of the PHP htmlspecialchars()
334 * @param content
335 * @param mode
336 * @return
337 */
338 private String htmlSpecialChars(String content, int mode) {
339
340 content = replace(content, "&", "&");
341
342
343 if (mode != MODE_ENT_NOQUOTES) {
344 content = replace(content, "\"", """);
345 }
346 if (mode == MODE_ENT_QUOTES) {
347 content = replace(content, "'", "'");
348 }
349 content = replace(content, "<", "<");
350 content = replace(content, ">", ">");
351 return content;
352
353 }
354
355
356 private String[] splitContent(String matchexp, String content) {
357
358 int startAt = 0;
359 List tempList = new ArrayList();
360
361 Pattern pattern = Pattern.compile(matchexp);
362
363 Matcher matcher = pattern.matcher(content);
364
365 while (matcher.find()) {
366 tempList.add(content.substring(startAt, matcher.start()));
367 tempList.add(matcher.group());
368 startAt = matcher.end();
369 }
370
371 tempList.add(content.substring(startAt));
372
373 String[] result = new String[tempList.size()];
374
375 for (int i = 0; i < result.length; i++) {
376 result[i] = (String) tempList.get(i);
377 }
378
379 return result;
380
381 }
382
383
384 /**
385 *
386 * @param content
387 * @param matches
388 * @param replaces
389 * @return
390 */
391 private String arrayReplaceAll(String content, String[] matches, String[] replaces) {
392
393 String result = content;
394
395 for (int x = 0; x < matches.length; x++) {
396 result = result.replaceAll(matches[x], replaces[x]);
397 }
398
399 return result;
400 }
401
402
403 /**
404 * Replace any occurances of a string pattern within a string with a different string.
405 *
406 * @param str The source string. This is the string that will be searched and have the replacements
407 * @param pattern The pattern to look for in str
408 * @param replace The string to insert in the place of <i>pattern</i>
409 * @return String with replace occurences
410 */
411 private static String replace(String str, String pattern, String replace) {
412 if (str == null || "".equals(str)) {
413 return str;
414 }
415
416 if (replace == null) {
417 return str;
418 }
419
420 if ("".equals(pattern)) {
421 return str;
422 }
423
424 int s = 0;
425 int e = 0;
426 StringBuffer result = new StringBuffer();
427
428 while ((e = str.indexOf(pattern, s)) >= 0) {
429 result.append(str.substring(s, e));
430 result.append(replace);
431 s = e + pattern.length();
432 }
433 result.append(str.substring(s));
434 return result.toString();
435 }
436
437
438 }