Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/tecnick/htmlutils/htmlstrings/HTMLStrings.java


1   package com.tecnick.htmlutils.htmlstrings;
2   
3   import java.nio.*;
4   import java.nio.charset.*;
5   
6   /**
7    * Collection of static utility methods to manipulate HTML strings.<br/><br/>
8    * Copyright (c) 2004-2005 Tecnick.com S.r.l (www.tecnick.com) Via Ugo Foscolo
9    * n.19 - 09045 Quartu Sant'Elena (CA) - ITALY - www.tecnick.com -
10   * info@tecnick.com <br/>
11   * Project homepage: <a href="http://htmlstrings.sourceforge.net" target="_blank">http://htmlstrings.sourceforge.net</a><br/>
12   * License: http://www.gnu.org/copyleft/lesser.html LGPL
13   * 
14   * @author Nicola Asuni [www.tecnick.com].
15   * @version 1.0.003
16   */
17  public class HTMLStrings {
18  
19    /**
20     * Void Constructor.
21     */
22    public HTMLStrings() {
23    }
24    
25    /**
26     * Converts byte-oriented character set such as ISO-8859-1 to UTF-8 Unicode.
27     * 
28     * @param source String source string to convert
29     * @param encoding String input encoding (name of a supported charset)
30     * @return String converted string, or original string in case of error
31     */
32    public static String charsetToUnicode(String source, String encoding) {
33      String str = source;
34      if (encoding.equalsIgnoreCase("UTF-8")) {
35        return str;
36      }
37      try {
38          Charset charset = Charset.forName(encoding);
39          CharsetDecoder decoder = charset.newDecoder();
40          CharsetEncoder encoder = charset.newEncoder();
41            // Convert a string to bytes in a ByteBuffer
42            ByteBuffer bbuf = encoder.encode(CharBuffer.wrap(source));
43            // Convert bytes in a ByteBuffer to a character ByteBuffer and then to a string.
44            CharBuffer cbuf = decoder.decode(bbuf);
45            str = cbuf.toString();
46        } catch (Exception e) {
47          System.err.println(e);
48        }
49        return str;
50    }
51    
52    /**
53     * Converts UTF-8 Unicode strings to byte-oriented character set such as ISO-8859-1.
54     * 
55     * @param source String source string to convert
56     * @param encoding String output encoding (name of a supported charset)
57     * @return String converted string, or original string in case of error
58     */
59    public static String unicodeToCharset(String source, String encoding) {
60      String str = source;
61      if (encoding.equalsIgnoreCase("UTF-8")) {
62        return str;
63      }
64      try {
65          Charset charset = Charset.forName(encoding);
66          CharsetEncoder encoder = charset.newEncoder();
67            // encodes Unicode characters into bytes in this charset
68            ByteBuffer bbuf = encoder.encode(CharBuffer.wrap(source));
69            str = new String(bbuf.array());
70        } catch (Exception e) {
71          System.err.println(e);
72        }
73        return str;
74    }
75  
76    /**
77     * Convert string to the requested encoding.
78     * 
79     * @param source String HTML source code to convert
80     * @param encoding_in String input encoding (name of a supported charset)
81     * @param encoding_out String output encoding (name of a supported charset)
82     * @return String converted string, or original string in case of error
83     */
84    public static String getEncodedString(String source, String encoding_in, String encoding_out) {
85      String str = source;
86      str = charsetToUnicode(str, encoding_in);
87      str = unicodeToCharset(str, encoding_out);
88      return str;
89    }
90  
91    /**
92     * Replace the following characters sequences with a blank space:<ul>
93     * <li>"\t" (ASCII 9 (0x09)), a tab</li>
94     * <li>"\n" (ASCII 10 (0x0A)), a new line (line feed)</li>
95     * <li>"\r" (ASCII 13 (0x0D)), a carriage return</li>
96     * <li>"\0" (ASCII 0 (0x00)), the NUL-byte</li>
97     * <li>"\f" (\u000C'), a form feed.</li>
98     * </ul>
99     * 
100    * @param str the input string
101    * @return compacted string
102    */
103   public static String compactString(String str) {
104     return str.replaceAll("[\t\n\r\0\f ]+", " ");
105   }
106 
107   /**
108    * Replace newlines characters sequences with &lt;br/&gt; element.
109    * 
110    * @param str String text to change
111    * @return String original string with replaced newlines
112    */
113   public static String autoBR(String str) {
114     return str.replaceAll("[\n\r]+", "<br/>\n");
115   }
116   
117 }