Source code: com/telefonicasoluciones/search/server/parser/xml/XMLParser.java
1 package com.telefonicasoluciones.search.server.parser.xml;
2
3 /*
4 * XMLParser.java
5 *
6 * Created on 12 November 2001, 09:37
7 */
8
9 import java.io.*;
10 import java.util.*;
11
12
13 public class XMLParser {
14 private ArrayList tokens;
15 private String encoding = null;
16
17 public XMLParser(String xmlContent) throws XMLParseException {
18 super();
19 tokens = new ArrayList();
20 try {
21 parse(xmlContent.toCharArray());
22 } catch(Exception e) {
23 throw new XMLParseException(e.getMessage());
24 }
25 }
26 public Reader getContentReader() throws XMLParseException {
27 try {
28 StringBuffer sb = new StringBuffer();
29 for(int i = 0; i < tokens.size(); i++) {
30 Object tok = tokens.get(i);
31 if(tok instanceof TagToken) {
32 sb.append(((TagToken)tokens.get(i)).getName().concat(" "));
33 } else if(tok instanceof TextToken) {
34 sb.append(((TextToken)tokens.get(i)).getText().concat(" "));
35 }
36 }
37 Reader reader = null;
38 if(encoding!=null) {
39 reader = new InputStreamReader(new ByteArrayInputStream(sb.toString().getBytes(encoding)));
40 } else {
41 reader = new StringReader(sb.toString());
42 }
43 return reader;
44 } catch(UnsupportedEncodingException e) {
45 throw new XMLParseException("Unsuported encoding "+encoding);
46 }
47 }
48 public String getContent() throws XMLParseException {
49 StringBuffer sb = new StringBuffer();
50 for(int i = 0; i < tokens.size(); i++) {
51 Object tok = tokens.get(i);
52 if(tok instanceof TagToken) {
53 sb.append(((TagToken)tokens.get(i)).getName().concat(" "));
54 } else if(tok instanceof TextToken) {
55 sb.append(((TextToken)tokens.get(i)).getText().concat(" "));
56 }
57 }
58 if(encoding!=null) {
59 try {
60 return new String(sb.toString().getBytes(),encoding);
61 } catch(UnsupportedEncodingException e) {
62 throw new XMLParseException("Unsuported encoding "+encoding);
63 }
64 } else {
65 return sb.toString();
66 }
67 }
68 public ArrayList getTokens() {
69 return tokens;
70 }
71 private int indexOf(char c, char array[], int start) throws Exception {
72 for(int i = start; i < array.length; i++) {
73 if(array[i] == c) {
74 return i;
75 }
76 }
77 return -1;
78 }
79 private char[] substring(char array[], int start, int end) throws Exception {
80 char string[] = new char[(end-start)+1];
81 int j = 0;
82 for(int i = start; i < end && i < array.length; i++) {
83 string[j] = array[i];
84 j++;
85 }
86 return string;
87 }
88 private void parse(char[] data) throws XMLParseException {
89 char separator = '>';
90 String token = new String();
91 int offset = -1;
92 int index = -1;
93 try {
94 while(true) {
95 offset = index + 1;
96 index = indexOf(separator, data, offset);
97 if(index>0) {
98 if(index < data.length)
99 token = String.valueOf(substring(data, offset, index));
100 } else {
101 break;
102 }
103 token = token.trim().intern();
104 if(separator == '<') {
105 if(token.length() > 0) {
106 TextToken tt = new TextToken();
107 tt.setText(token);
108 tokens.add(tt);
109 }
110 } else {
111 TagToken tt = new TagToken(token);
112 if(token.startsWith("?")&&token.endsWith("?")) {
113 if(tt.getAttribute("encoding")!=null) {
114 encoding = tt.getAttribute("encoding");
115 }
116 } else {
117 tokens.add(tt);
118 }
119 }
120 if(separator == '<')
121 separator = '>';
122 else
123 separator = '<';
124 }
125 } catch(Exception e) {
126 throw new XMLParseException(e.getMessage());
127 }
128 }
129 }