Source code: com/tecnick/htmlutils/htmlstrings/HTMLStrings.java
1 package com.tecnick.htmlutils.htmlstrings;
2
3 import java.nio.*;
4 import java.nio.charset.*;
5
6 /**
7 * Collection of static utility methods to manipulate HTML strings.<br/><br/>
8 * Copyright (c) 2004-2005 Tecnick.com S.r.l (www.tecnick.com) Via Ugo Foscolo
9 * n.19 - 09045 Quartu Sant'Elena (CA) - ITALY - www.tecnick.com -
10 * info@tecnick.com <br/>
11 * Project homepage: <a href="http://htmlstrings.sourceforge.net" target="_blank">http://htmlstrings.sourceforge.net</a><br/>
12 * License: http://www.gnu.org/copyleft/lesser.html LGPL
13 *
14 * @author Nicola Asuni [www.tecnick.com].
15 * @version 1.0.003
16 */
17 public class HTMLStrings {
18
19 /**
20 * Void Constructor.
21 */
22 public HTMLStrings() {
23 }
24
25 /**
26 * Converts byte-oriented character set such as ISO-8859-1 to UTF-8 Unicode.
27 *
28 * @param source String source string to convert
29 * @param encoding String input encoding (name of a supported charset)
30 * @return String converted string, or original string in case of error
31 */
32 public static String charsetToUnicode(String source, String encoding) {
33 String str = source;
34 if (encoding.equalsIgnoreCase("UTF-8")) {
35 return str;
36 }
37 try {
38 Charset charset = Charset.forName(encoding);
39 CharsetDecoder decoder = charset.newDecoder();
40 CharsetEncoder encoder = charset.newEncoder();
41 // Convert a string to bytes in a ByteBuffer
42 ByteBuffer bbuf = encoder.encode(CharBuffer.wrap(source));
43 // Convert bytes in a ByteBuffer to a character ByteBuffer and then to a string.
44 CharBuffer cbuf = decoder.decode(bbuf);
45 str = cbuf.toString();
46 } catch (Exception e) {
47 System.err.println(e);
48 }
49 return str;
50 }
51
52 /**
53 * Converts UTF-8 Unicode strings to byte-oriented character set such as ISO-8859-1.
54 *
55 * @param source String source string to convert
56 * @param encoding String output encoding (name of a supported charset)
57 * @return String converted string, or original string in case of error
58 */
59 public static String unicodeToCharset(String source, String encoding) {
60 String str = source;
61 if (encoding.equalsIgnoreCase("UTF-8")) {
62 return str;
63 }
64 try {
65 Charset charset = Charset.forName(encoding);
66 CharsetEncoder encoder = charset.newEncoder();
67 // encodes Unicode characters into bytes in this charset
68 ByteBuffer bbuf = encoder.encode(CharBuffer.wrap(source));
69 str = new String(bbuf.array());
70 } catch (Exception e) {
71 System.err.println(e);
72 }
73 return str;
74 }
75
76 /**
77 * Convert string to the requested encoding.
78 *
79 * @param source String HTML source code to convert
80 * @param encoding_in String input encoding (name of a supported charset)
81 * @param encoding_out String output encoding (name of a supported charset)
82 * @return String converted string, or original string in case of error
83 */
84 public static String getEncodedString(String source, String encoding_in, String encoding_out) {
85 String str = source;
86 str = charsetToUnicode(str, encoding_in);
87 str = unicodeToCharset(str, encoding_out);
88 return str;
89 }
90
91 /**
92 * Replace the following characters sequences with a blank space:<ul>
93 * <li>"\t" (ASCII 9 (0x09)), a tab</li>
94 * <li>"\n" (ASCII 10 (0x0A)), a new line (line feed)</li>
95 * <li>"\r" (ASCII 13 (0x0D)), a carriage return</li>
96 * <li>"\0" (ASCII 0 (0x00)), the NUL-byte</li>
97 * <li>"\f" (\u000C'), a form feed.</li>
98 * </ul>
99 *
100 * @param str the input string
101 * @return compacted string
102 */
103 public static String compactString(String str) {
104 return str.replaceAll("[\t\n\r\0\f ]+", " ");
105 }
106
107 /**
108 * Replace newlines characters sequences with <br/> element.
109 *
110 * @param str String text to change
111 * @return String original string with replaced newlines
112 */
113 public static String autoBR(String str) {
114 return str.replaceAll("[\n\r]+", "<br/>\n");
115 }
116
117 }