1 /*--
2
3 Copyright (C) 2000 Brett McLaughlin & Jason Hunter.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 1. Redistributions of source code must retain the above copyright
11 notice, this list of conditions, and the following disclaimer.
12
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions, and the disclaimer that follows
15 these conditions in the documentation and/or other materials
16 provided with the distribution.
17
18 3. The name "JDOM" must not be used to endorse or promote products
19 derived from this software without prior written permission. For
20 written permission, please contact license@jdom.org.
21
22 4. Products derived from this software may not be called "JDOM", nor
23 may "JDOM" appear in their name, without prior written permission
24 from the JDOM Project Management (pm@jdom.org).
25
26 In addition, we request (but do not require) that you include in the
27 end-user documentation provided with the redistribution and/or in the
28 software itself an acknowledgement equivalent to the following:
29 "This product includes software developed by the
30 JDOM Project (http://www.jdom.org/)."
31 Alternatively, the acknowledgment may be graphical using the logos
32 available at http://www.jdom.org/images/logos.
33
34 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
35 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
36 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37 DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
38 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
41 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
42 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
43 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
44 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 SUCH DAMAGE.
46
47 This software consists of voluntary contributions made by many
48 individuals on behalf of the JDOM Project and was originally
49 created by Brett McLaughlin <brett@jdom.org> and
50 Jason Hunter <jhunter@jdom.org>. For more information on the
51 JDOM Project, please see <http://www.jdom.org/>.
52
53 */
54 package sax;
55
56 import java.util.Stack;
57
58 import org.xml.sax.Attributes;
59 import org.xml.sax.SAXException;
60 import org.xml.sax.XMLReader;
61
62
63 /**
64 * Filter for removing formatting from data- or field-oriented XML.
65 *
66 * <i>Code and comments adapted from DataWriter-0.2, written
67 * by David Megginson and released into the public domain,
68 * without warranty.</i>
69 *
70 * <p>This filter removes leading and trailing whitespace from
71 * field-oriented XML without mixed content. Note that this class will
72 * likely not yield appropriate results for document-oriented XML like
73 * XHTML pages, which mix character data and elements together.</p>
74 *
75 * @see DataFormatFilter
76 */
77 public class DataUnformatFilter extends XMLFilterBase
78 {
79
80
81
82 ////////////////////////////////////////////////////////////////////
83 // Constructors.
84 ////////////////////////////////////////////////////////////////////
85
86
87 /**
88 * Create a new filter.
89 */
90 public DataUnformatFilter()
91 {
92 }
93
94
95 /**
96 * Create a new filter.
97 *
98 * <p>Use the XMLReader provided as the source of events.</p>
99 *
100 * @param xmlreader The parent in the filter chain.
101 */
102 public DataUnformatFilter(XMLReader xmlreader)
103 {
104 super(xmlreader);
105 }
106
107
108
109 ////////////////////////////////////////////////////////////////////
110 // Public methods.
111 ////////////////////////////////////////////////////////////////////
112
113
114 /**
115 * Reset the filter so that it can be reused.
116 *
117 * <p>This method is especially useful if the filter failed
118 * with an exception the last time through.</p>
119 */
120 public void reset ()
121 {
122 state = SEEN_NOTHING;
123 stateStack = new Stack();
124 whitespace = new StringBuffer();
125 }
126
127
128
129 ////////////////////////////////////////////////////////////////////
130 // Methods from org.xml.sax.ContentHandler.
131 ////////////////////////////////////////////////////////////////////
132
133
134 /**
135 * Filter a start document event.
136 *
137 * <p>Reset state and pass the event on for further processing.</p>
138 *
139 * @exception org.xml.sax.SAXException If a filter
140 * further down the chain raises an exception.
141 * @see org.xml.sax.ContentHandler#startDocument
142 */
143 public void startDocument ()
144 throws SAXException
145 {
146 reset();
147 super.startDocument();
148 }
149
150
151 /**
152 * Filter a start element event.
153 *
154 * @param uri The element's Namespace URI.
155 * @param localName The element's local name.
156 * @param qName The element's qualified (prefixed) name.
157 * @param atts The element's attribute list.
158 * @exception org.xml.sax.SAXException If a filter
159 * further down the chain raises an exception.
160 * @see org.xml.sax.ContentHandler#startElement
161 */
162 public void startElement (String uri, String localName,
163 String qName, Attributes atts)
164 throws SAXException
165 {
166 clearWhitespace();
167 stateStack.push(SEEN_ELEMENT);
168 state = SEEN_NOTHING;
169 super.startElement(uri, localName, qName, atts);
170 }
171
172
173 /**
174 * Filter an end element event.
175 *
176 * @param uri The element's Namespace URI.
177 * @param localName The element's local name.
178 * @param qName The element's qualified (prefixed) name.
179 * @exception org.xml.sax.SAXException If a filter
180 * further down the chain raises an exception.
181 * @see org.xml.sax.ContentHandler#endElement
182 */
183 public void endElement (String uri, String localName, String qName)
184 throws SAXException
185 {
186 if (state == SEEN_ELEMENT) {
187 clearWhitespace();
188 } else {
189 emitWhitespace();
190 }
191 state = stateStack.pop();
192 super.endElement(uri, localName, qName);
193 }
194
195
196 /**
197 * Filter a character data event.
198 *
199 * @param ch The characters to write.
200 * @param start The starting position in the array.
201 * @param length The number of characters to use.
202 * @exception org.xml.sax.SAXException If a filter
203 * further down the chain raises an exception.
204 * @see org.xml.sax.ContentHandler#characters
205 */
206 public void characters (char ch[], int start, int length)
207 throws SAXException
208 {
209 if (state != SEEN_DATA) {
210
211 /* Look for non-whitespace. */
212
213 int end = start + length;
214 while (end-- > start) {
215 if (!isXMLWhitespace(ch[end]))
216 break;
217 }
218
219 /*
220 * If all the characters are whitespace, save them for later.
221 * If we've got some data, emit any saved whitespace and update
222 * our state to show we've seen data.
223 */
224
225 if (end < start) {
226 saveWhitespace(ch, start, length);
227 } else {
228 state = SEEN_DATA;
229 emitWhitespace();
230 }
231 }
232
233 /* Pass on everything inside a data field. */
234
235 if (state == SEEN_DATA) {
236 super.characters(ch, start, length);
237 }
238 }
239
240
241 /**
242 * Filter an ignorable whitespace event.
243 *
244 * @param ch The array of characters to write.
245 * @param start The starting position in the array.
246 * @param length The number of characters to write.
247 * @exception org.xml.sax.SAXException If a filter
248 * further down the chain raises an exception.
249 * @see org.xml.sax.ContentHandler#ignorableWhitespace
250 */
251 public void ignorableWhitespace (char ch[], int start, int length)
252 throws SAXException
253 {
254 emitWhitespace();
255 // ignore
256 }
257
258
259 /**
260 * Filter a processing instruction event.
261 *
262 * @param target The PI target.
263 * @param data The PI data.
264 * @exception org.xml.sax.SAXException If a filter
265 * further down the chain raises an exception.
266 * @see org.xml.sax.ContentHandler#processingInstruction
267 */
268 public void processingInstruction (String target, String data)
269 throws SAXException
270 {
271 emitWhitespace();
272 super.processingInstruction(target, data);
273 }
274
275
276
277 ////////////////////////////////////////////////////////////////////
278 // Internal methods.
279 ////////////////////////////////////////////////////////////////////
280
281
282 /**
283 * Saves trailing whitespace.
284 */
285 protected void saveWhitespace (char[] ch, int start, int length) {
286 whitespace.append(ch, start, length);
287 }
288
289
290 /**
291 * Passes saved whitespace down the filter chain.
292 */
293 protected void emitWhitespace ()
294 throws SAXException
295 {
296 char[] data = new char[whitespace.length()];
297 whitespace.getChars(0, data.length, data, 0);
298 whitespace.setLength(0);
299 super.characters(data, 0, data.length);
300 }
301
302
303 /**
304 * Discards saved whitespace.
305 */
306 protected void clearWhitespace () {
307 whitespace.setLength(0);
308 }
309
310
311 /**
312 * Returns <var>true</var> if character is XML whitespace.
313 */
314 private boolean isXMLWhitespace (char c)
315 {
316 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
317 }
318
319
320
321
322 ////////////////////////////////////////////////////////////////////
323 // Constants.
324 ////////////////////////////////////////////////////////////////////
325
326 private static final Object SEEN_NOTHING = new Object();
327 private static final Object SEEN_ELEMENT = new Object();
328 private static final Object SEEN_DATA = new Object();
329
330
331 ////////////////////////////////////////////////////////////////////
332 // Internal state.
333 ////////////////////////////////////////////////////////////////////
334
335 private Object state = SEEN_NOTHING;
336 private Stack stateStack = new Stack();
337
338 private StringBuffer whitespace = new StringBuffer();
339
340 }
341
342 // end of DataUnformatFilter.java