Source code: org/media/mn8/util/textparser/TextParser.java
1 /*
2 * $COPYRIGHT$
3 * $Id: TextParser.java,v 1.2 2002/10/24 05:37:30 neuro Exp $
4 *
5 * Date Author Changes
6 * Jul 03 2001 Remus Pereni Created
7 */
8
9 package org.media.mn8.util.textparser;
10
11 import java.io.*;
12
13 /**
14 * @author <a href="mailto:remus@nolimits.ro">Remus Pereni</a>
15 * @version $Revision: 1.2 $ $Date: 2002/10/24 05:37:30 $
16 */
17 public class TextParser {
18
19 protected Token getParagraphs(String text) {
20 Token currentToken = null;
21 Token result = null;
22 int currentLineNr = 0;
23 int currentParagraphStartLine = -1;
24 if(text == null)
25 throw new RuntimeException("null text on Text Parser input");
26 char buffer[] = text.toCharArray();
27 StringBuffer line = new StringBuffer();
28 StringBuffer para = new StringBuffer();
29 for(int i = 0; i < buffer.length; i++) {
30 if(buffer[i] == '\n' || buffer[i] == '\r') {
31 currentLineNr++;
32 line.append(buffer[i]);
33 if(buffer[i] == '\r' && i + 1 < buffer.length && buffer[i + 1] != '\n'){
34 i++;
35 line.append(buffer[i]);
36 }
37
38 if("".equals(line.toString().trim())) {
39 if(!"".equals(para.toString().trim())) {
40 Paragraph newToken = new Paragraph(para, currentParagraphStartLine, 0, this);
41 newToken.setPreviousToken(currentToken);
42 if(currentToken != null) {
43 currentToken.setNextToken(newToken);
44 currentToken = newToken;
45 } else {
46 result = currentToken = newToken;
47 }
48 para = new StringBuffer();
49 currentParagraphStartLine = -1;
50 }
51 EmptyLine emptyLineToken = new EmptyLine(line, currentLineNr, 0);
52 emptyLineToken.setPreviousToken(currentToken);
53 if(currentToken != null) {
54 currentToken.setNextToken(emptyLineToken);
55 currentToken = emptyLineToken;
56 } else {
57 result = currentToken = emptyLineToken;
58 }
59 line = new StringBuffer();
60 } else {
61 if(para.length() <= 0) {
62 currentParagraphStartLine = currentLineNr;
63 }
64 para.append(line);
65 if(line.length() > maximColumnNr) {
66 maximColumnNr = line.length();
67 }
68 line = new StringBuffer();
69 }
70 } else {
71 line.append(buffer[i]);
72 }
73
74 }
75 if(para.length() > 0) {
76 Paragraph newToken = new Paragraph(para, currentParagraphStartLine, 0, this);
77 newToken.setPreviousToken(currentToken);
78 if(currentToken != null) {
79 currentToken.setNextToken(newToken);
80 currentToken = newToken;
81 } else {
82 result = currentToken = newToken;
83 }
84 }
85 totalLineNr = currentLineNr;
86 return result;
87 }
88
89
90 public String toString() {
91 if(_rootToken != null) {
92 StringBuffer buffer = new StringBuffer();
93 for(Token currentToken = _rootToken; currentToken != null; currentToken = currentToken.getNextToken())
94 buffer.append(currentToken.toString());
95
96 return buffer.toString();
97 } else {
98 return "No items in parser";
99 }
100 }
101
102
103 public String toXML() {
104 StringBuffer result = new StringBuffer();
105 result.append("<?xml version=\"1.0\" encoding=\"ISO-8859-2\" ?>\n");
106 result.append("<document>\n");
107 for(Token node = _rootToken; node.getNextToken() != null; node = node.getNextToken()){
108 result.append( node.toXML());
109 }
110 result.append("</document>");
111 return result.toString();
112 }
113
114 protected void cleanEmptyLines() {
115 Token pilon;
116 for(pilon = _rootToken; pilon.getNextToken() != null && pilon.getTokenType() == 4; pilon = pilon.getNextToken());
117 _rootToken = pilon;
118 for(Token avans = pilon.getNextToken(); avans != null; avans = pilon.getNextToken()) {
119 while(avans.getNextToken() != null && avans.getTokenType() == 4)
120 avans = avans.getNextToken();
121 if(avans != null && pilon.getNextToken() != avans) {
122 avans.setPreviousToken(pilon);
123 pilon.setNextToken(avans);
124 }
125 pilon = avans;
126 }
127
128 }
129
130 public static void main(String args[]){
131 StringBuffer sis = new StringBuffer();
132 try {
133 BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
134 String line;
135 if(System.in.available() > 0)
136 while((line = in.readLine()) != null)
137 sis.append(line + '\n');
138 TextParser parser = new TextParser(sis.toString());
139 parser.cleanEmptyLines();
140 System.out.println(parser.toXML());
141 } catch(Exception ex)
142 {
143 ex.printStackTrace();
144 }
145 }
146
147 private final void _mththis()
148 {
149 totalLineNr = -1;
150 maximColumnNr = -1;
151 }
152
153 public TextParser(String text) {
154 _mththis();
155 _rootToken = getParagraphs(text);
156 }
157
158 public static String EOL = System.getProperty("line.separator");
159 private StringBuffer _text;
160 int totalLineNr;
161 int maximColumnNr;
162 private Token _rootToken;
163
164 }