Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/roller/presentation/velocity/plugins/textile/Textile.java


1   /**
2    * Copyright (c) 2003, David A. Czarnecki
3    * All rights reserved.
4    *
5    * Portions Copyright (c) 2003 by Mark Lussier
6    *
7    * Redistribution and use in source and binary forms, with or without
8    * modification, are permitted provided that the following conditions are met:
9    *
10   * Redistributions of source code must retain the above copyright notice,
11   *      this list of conditions and the following disclaimer.
12   * Redistributions in binary form must reproduce the above copyright notice,
13   *      this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
14   * Neither the name of the "David A. Czarnecki" and "blojsom" nor the names of
15   * its contributors may be used to endorse or promote products derived from
16   * this software without specific prior written permission.
17   * Products derived from this software may not be called "blojsom",
18   * nor may "blojsom" appear in their name, without prior written permission of
19   * David A. Czarnecki.
20   *
21   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
22   * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
23   * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24   * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
25   * EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
26   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28   * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30   * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
33   * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34   */
35  
36  package org.roller.presentation.velocity.plugins.textile;
37  
38  import java.util.ArrayList;
39  import java.util.List;
40  import java.util.StringTokenizer;
41  import java.util.regex.Matcher;
42  import java.util.regex.Pattern;
43  
44  /**
45   * Textile
46   *
47   * @author Mark Lussier
48   * @version $Id: Textile.java,v 1.2 2003/09/03 21:21:54 lavandowska Exp $
49   */
50  public class Textile implements TextileConstants {
51  
52      /**
53       *
54       */
55      public Textile() {
56      }
57  
58  
59      public String process(String content) {
60  
61          /**
62           * Turn any incoming ampersands into a dummy character for now.
63           * This uses a negative lookahead for alphanumerics followed by a semicolon,
64           * implying an incoming html entity, to be skipped
65           */
66          //text = preg_replace("&(?![#a-zA-Z0-9]+;)","x%x%",text);
67          content = content.replaceAll(EXP_AMPERSAND, EXP_AMPERSAND_REPLACE);
68  
69          /**
70           * unentify angle brackets and ampersands
71           */
72          content = replace(content, ">", ">");
73          content = replace(content, "&lt;", "<");
74          content = replace(content, "&amp;", "&");
75  
76  
77  
78  
79          /**
80           *  zap carriage returns
81           * @todo optimize
82           */
83          content = replace(content, "\r\n", "\n");
84  
85          /**
86           * zap tabs
87           * @todo optimize
88           */
89          content = replace(content, "\t", "");
90  
91  
92          /**
93           * trim each line
94           */
95          StringBuffer splitBuffer = new StringBuffer();
96          StringTokenizer tokenizer = new StringTokenizer(content, "\n", true);
97          while (tokenizer.hasMoreTokens()) {
98              splitBuffer.append(tokenizer.nextToken().trim());
99              splitBuffer.append("\n");
100         }
101 
102         content = splitBuffer.toString();
103 
104         //### Find and replace quick tags
105 
106 
107         /**
108          * double equal signs mean <notextile>
109          */
110         content = content.replaceAll(EXP_DOUBLEQUOTE_MATCH, EXP_DOUBLEQUOTE_REPLACE);
111 
112 
113         /**
114          * image qtag
115          */
116         content = content.replaceAll(EXP_IMAGE_QTAG_MATCH, EXP_IMAGE_QTAG_REPLACE);
117 
118         //# image with hyperlink
119         //text = preg_replace("(<img.+ \\/>):(\\S+)","<a href=\"$2\">$1</a>",text);
120 
121         /**
122          *  hyperlink qtag
123          */
124         content = content.replaceAll(EXP_HREF_QTAG_MATCH, EXP_HREF_QTAG_REPLACE);
125 
126 
127         /**
128          * loop through the array, replacing qtags with html
129          */
130 
131         for (int x = 0; x < EXP_PHRASE_MODIFIER_SOURCETAGS.length; x++) {
132             content.replaceAll("(^|\\s|>)" + EXP_PHRASE_MODIFIER_SOURCETAGS[x] + "\\b(.+?)\\b([^\\w\\s]*?)"
133                                + EXP_PHRASE_MODIFIER_SOURCETAGS[x] + "([^\\w\\s]{0,2})(\\s|$)?"
134                                , "$1<" + EXP_PHRASE_MODIFIER_REPLACETAGS[x] + ">$2$3</" + EXP_PHRASE_MODIFIER_REPLACETAGS[x] + ">$4");
135         }
136 
137 
138 
139 
140 
141 
142 
143         /**
144          * From the Origional Docs:
145          * "some weird bs with underscores and \b word boundaries,
146          * so we'll do those on their own"
147          */
148         content = content.replaceAll(EXP_EMPHASIS_MATCH, EXP_EMPHASIS_REPLACE);
149         content = content.replaceAll(EXP_ITALICS_MATCH, EXP_ITALICS_REPLACE);
150         content = content.replaceAll(EXP_SUPERSCRIPT_MATCH, EXP_SUPERSCRIPT_REPLACE);
151 
152 
153         /**
154          * small problem with double quotes at the end of a string
155          */
156         content = content.replaceAll(EXP_EOL_DBL_QUOTES, " ");
157 
158 
159         String[] glyphMatches = {EXP_SINGLE_CLOSING,
160                                  EXP_SINGLE_OPENING,
161                                  EXP_DOUBLE_CLOSING,
162                                  EXP_DOUBLE_OPENING,
163                                  EXP_ELLIPSES,
164                                  EXP_3UPPER_ACCRONYM,
165                                  EXP_3UPPERCASE_CAPS,
166                                  EXP_EM_DASH,
167                                  EXP_EN_DASH,
168                                  EXP_EN_DECIMAL_DASH,
169                                  EXP_DIMENSION_SIGN,
170                                  EXP_TRADEMARK,
171                                  EXP_REGISTERED,
172                                  EXP_COPYRIGHT};
173 
174 
175         String[] glyphReplacement = {REPLACE_SINGLE_CLOSING,
176                                      REPLACE_SINGLE_OPENING,
177                                      REPLACE_DOUBLE_CLOSING,
178                                      REPLACE_DOUBLE_OPENING,
179                                      REPLACE_ELLIPSES,
180                                      REPLACE_3UPPER_ACCRONYM,
181                                      REPLACE_3UPPERCASE_CAPS,
182                                      REPLACE_EM_DASH,
183                                      REPLACE_EN_DASH,
184                                      REPLACE_EN_DECIMAL_DASH,
185                                      REPLACE_DIMENSION_SIGN,
186                                      REPLACE_TRADEMARK,
187                                      REPLACE_REGISTERED,
188                                      REPLACE_COPYRIGHT};
189 
190 
191         boolean ishtml = Pattern.compile(EXP_ISHTML).matcher(content).find();
192         boolean inpreservation = false;
193 
194         if (!ishtml) {
195             content = arrayReplaceAll(content, glyphMatches, glyphReplacement);
196         } else {
197             String[] segments = splitContent(EXP_ISHTML, content);
198 
199             StringBuffer segmentBuffer = new StringBuffer();
200             for (int x = 0; x < segments.length; x++) {
201                 //  # matches are off if we're between <code>, <pre> etc.
202                 if (segments[x].toLowerCase().matches(EXP_STARTPRESERVE)) {
203                     inpreservation = true;
204                 } else if (segments[x].toLowerCase().matches(EXP_ENDPRESERVE)) {
205                     inpreservation = false;
206                 }
207 
208                 if (!Pattern.compile(EXP_ISHTML).matcher(segments[x]).find() && !inpreservation) {
209                     segments[x] = arrayReplaceAll(segments[x], glyphMatches, glyphReplacement);
210                 }
211 
212                 //# convert htmlspecial if between <code>
213                 if (inpreservation) {
214                     segments[x] = htmlSpecialChars(segments[x], MODE_ENT_NOQUOTES);
215                     segments[x] = replace(segments[x], "&lt;pre&gt;", "<pre>");
216                     segments[x] = replace(segments[x], "&lt;code&gt;", "<code>");
217                     segments[x] = replace(segments[x], "&lt;notextile&gt;", "<notextile>");
218                 }
219 
220                 segmentBuffer.append(segments[x]);
221 
222             }
223 
224             content = segmentBuffer.toString();
225 
226         }
227 
228 
229         //### Block level formatting
230 
231         //# deal with forced breaks; this is going to be a problem between
232         //#  <pre> tags, but we'll clean them later
233 
234         content = content.replaceAll(EXP_FORCESLINEBREAKS, REPLACE_FORCESLINEBREAK);
235 
236         //# might be a problem with lists
237         content = replace(content, "l><br />", "l>\n");
238 
239 
240         String[] blockMatches = {EXP_BULLETED_LIST,
241                                  EXP_NUMERIC_LIST,
242                                  EXP_BLOCKQUOTE,
243                                  EXP_HEADER_WITHCLASS,
244                                  EXP_HEADER,
245                                  EXP_PARA_WITHCLASS,
246                                  EXP_PARA,
247                                  EXP_REMAINING_PARA};
248 
249         String[] blockReplace = {REPLACE_BULLETED_LIST,
250                                  REPLACE_NUMERIC_LIST,
251                                  REPLACE_BLOCKQUOTE,
252                                  REPLACE_HEADER_WITHCLASS,
253                                  REPLACE_HEADER,
254                                  REPLACE_PARA_WITHCLASS,
255                                  REPLACE_PARA,
256                                  REPLACE_REMAINING_PARA};
257 
258 
259         StringBuffer blockBuffer = new StringBuffer();
260         String list = "";
261         content += " \n";
262 
263         boolean inpre = false;
264         //# split the text into an array by newlines
265         StringTokenizer blockTokenizer = new StringTokenizer(content, "\n", false);
266 
267         while (blockTokenizer.hasMoreTokens()) {
268             String line = blockTokenizer.nextToken();
269 
270             //#make sure the line isn't blank
271             if (!line.matches("^$")) {
272 
273                 //# matches are off if we're between <pre> or <code> tags
274                 if (line.toLowerCase().indexOf("<pre>") > -1) {
275                     inpre = true;
276                 }
277 
278                 //# deal with block replacements first, then see if we're in a list
279                 if (!inpre) {
280                     line = arrayReplaceAll(line, blockMatches, blockReplace);
281                 }
282 
283                 //# kill any br tags that slipped in earlier
284                 if (inpre) {
285                     line = replace(line, "<br />", "\n");
286                     line = replace(line, "<br/>", "\n");
287                 }
288                 //# matches back on after </pre>
289                 if (line.toLowerCase().indexOf("</pre>") > -1) {
290                     inpre = false;
291                 }
292 
293                 //# at the beginning of a list, $line switches to a value
294                 boolean islist = Pattern.compile(EXP_LISTSTART).matcher(line).find();
295                 boolean islistline = Pattern.compile(EXP_LISTSTART + list).matcher(line).find();
296                 if (list.length() == 0 && islist) {
297                     line = line.replaceAll(EXP_MATCHLIST, REPLACE_MATCHLIST);
298                     list = line.substring(2, 3);
299 
300                     //# at the end of a list, $line switches to empty
301                 } else if (list.length() > 0 && !islistline) {
302                     line = line.replaceAll(EXP_ENDMATCHLIST, "</" + list + REPLACE_ENDMATCHLIST);
303                     list = "";
304                 }
305             }
306             // push each line to a new array once it's processed
307             blockBuffer.append(line);
308             blockBuffer.append("\n");
309 
310         }
311 
312         content = blockBuffer.toString();
313 
314 
315         //#clean up <notextile>
316         content = content.replaceAll("<\\/?notextile>", "");
317 
318         //# clean up liu and lio
319         content = content.replaceAll("<(\\/?)li(u|o)>", "<$1li>");
320 
321         //# turn the temp char back to an ampersand entity
322         content = replace(content,"x%x%","&#38;");
323 
324         //# Newline linebreaks, just for markup tidiness
325         content= replace(content,"<br />","<br />\n");
326 
327 
328         return content;
329 
330     }
331 
332     /**
333      * An implementation of the PHP htmlspecialchars()
334      * @param content
335      * @param mode
336      * @return
337      */
338     private String htmlSpecialChars(String content, int mode) {
339 
340         content = replace(content, "&", "&amp;");
341 
342 
343         if (mode != MODE_ENT_NOQUOTES) {
344             content = replace(content, "\"", "&quot;");
345         }
346         if (mode == MODE_ENT_QUOTES) {
347             content = replace(content, "'", "&#039;");
348         }
349         content = replace(content, "<", "&lt;");
350         content = replace(content, ">", "&gt;");
351         return content;
352 
353     }
354 
355 
356     private String[] splitContent(String matchexp, String content) {
357 
358         int startAt = 0;
359         List tempList = new ArrayList();
360 
361         Pattern pattern = Pattern.compile(matchexp);
362 
363         Matcher matcher = pattern.matcher(content);
364 
365         while (matcher.find()) {
366             tempList.add(content.substring(startAt, matcher.start()));
367             tempList.add(matcher.group());
368             startAt = matcher.end();
369         }
370 
371         tempList.add(content.substring(startAt));
372 
373         String[] result = new String[tempList.size()];
374 
375         for (int i = 0; i < result.length; i++) {
376             result[i] = (String) tempList.get(i);
377         }
378 
379         return result;
380 
381     }
382 
383 
384     /**
385      *
386      * @param content
387      * @param matches
388      * @param replaces
389      * @return
390      */
391     private String arrayReplaceAll(String content, String[] matches, String[] replaces) {
392 
393         String result = content;
394 
395         for (int x = 0; x < matches.length; x++) {
396             result = result.replaceAll(matches[x], replaces[x]);
397         }
398 
399         return result;
400     }
401 
402 
403     /**
404      * Replace any occurances of a string pattern within a string with a different string.
405      *
406      * @param str The source string.  This is the string that will be searched and have the replacements
407      * @param pattern The pattern to look for in str
408      * @param replace The string to insert in the place of <i>pattern</i>
409      * @return String with replace occurences
410      */
411     private static String replace(String str, String pattern, String replace) {
412         if (str == null || "".equals(str)) {
413             return str;
414         }
415 
416         if (replace == null) {
417             return str;
418         }
419 
420         if ("".equals(pattern)) {
421             return str;
422         }
423 
424         int s = 0;
425         int e = 0;
426         StringBuffer result = new StringBuffer();
427 
428         while ((e = str.indexOf(pattern, s)) >= 0) {
429             result.append(str.substring(s, e));
430             result.append(replace);
431             s = e + pattern.length();
432         }
433         result.append(str.substring(s));
434         return result.toString();
435     }
436 
437 
438 }