Source code: org/htmlparser/tests/BenchmarkTidy.java
1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/BenchmarkTidy.java,v 1.2 2004/02/10 13:41:08 woolfel Exp $
2 /*
3 * ====================================================================
4 * Copyright 2002-2004 The Apache Software Foundation.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 */
19
20 // The developers of JMeter and Apache are greatful to the developers
21 // of HTMLParser for giving Apache Software Foundation a non-exclusive
22 // license. The performance benefits of HTMLParser are clear and the
23 // users of JMeter will benefit from the hard work the HTMLParser
24 // team. For detailed information about HTMLParser, the project is
25 // hosted on sourceforge at http://htmlparser.sourceforge.net/.
26 //
27 // HTMLParser was originally created by Somik Raha in 2000. Since then
28 // a healthy community of users has formed and helped refine the
29 // design so that it is able to tackle the difficult task of parsing
30 // dirty HTML. Derrick Oswald is the current lead developer and was kind
31 // enough to assist JMeter.
32
33 package org.htmlparser.tests;
34
35 import java.io.BufferedReader;
36 import java.io.ByteArrayInputStream;
37 import java.io.File;
38 import java.io.FileReader;
39 import java.io.IOException;
40 import java.io.UnsupportedEncodingException;
41
42 import org.w3c.dom.Document;
43 import org.w3c.dom.NamedNodeMap;
44 import org.w3c.dom.Node;
45 import org.w3c.dom.NodeList;
46 import org.w3c.tidy.Tidy;
47 import org.xml.sax.SAXException;
48
49 /**
50 * Title: Apache Jakarta JMeter<br>
51 * Copyright: Copyright (c) Apache<br>
52 * Company: Apache<br>
53 * License:<br>
54 * <br>
55 * The license is at the top!<br>
56 * <br>
57 * Description:<br>
58 * <br>
59 * This is a quick class to benchmark tidy against htmlparser.
60 * It is pretty basic and uses the same process as the original
61 * image parsing code in JMeter 1.9.0 and earlier.
62 * <p>
63 * Author: pete<br>
64 * Version: 0.1<br>
65 * Created on: Sep 30, 2003<br>
66 * Last Modified: 7:41:39 AM<br>
67 */
68 public class BenchmarkTidy
69 {
70
71 protected static String utfEncodingName;
72
73 /**
74 *
75 */
76 public BenchmarkTidy(String data)
77 {
78 try
79 {
80 Document doc = (Document) getDOM(data);
81 parseNodes(doc, "img", false, "src");
82 }
83 catch (SAXException e)
84 {
85 e.printStackTrace();
86 }
87 }
88
89 protected void parseNodes(
90 Document html,
91 String htmlTag,
92 boolean type,
93 String srcTag)
94 {
95
96 NodeList nodeList = html.getElementsByTagName(htmlTag);
97 boolean uniqueBinary;
98
99 for (int i = 0; i < nodeList.getLength(); i++)
100 {
101 uniqueBinary = true;
102 Node tempNode = nodeList.item(i);
103
104 // get the url of the Binary
105 NamedNodeMap nnm = tempNode.getAttributes();
106 Node namedItem = null;
107
108 if (type)
109 {
110 // if type is set, we need 'type=image'
111 namedItem = nnm.getNamedItem("type");
112 if (namedItem == null)
113 {
114 break;
115 }
116 String inputType = namedItem.getNodeValue();
117
118 if (inputType != null && inputType.equalsIgnoreCase("image"))
119 {
120 // then we need to download the binary
121 }
122 else
123 {
124 break;
125 }
126 }
127 namedItem = nnm.getNamedItem(srcTag);
128 System.out.println("Image Tag: " + htmlTag + " src=" + namedItem);
129 }
130 }
131
132 protected static Tidy getParser()
133 {
134 Tidy tidy = new Tidy();
135 tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
136 tidy.setQuiet(true);
137 tidy.setShowWarnings(false);
138
139 return tidy;
140 }
141
142 protected static Node getDOM(String text) throws SAXException
143 {
144
145 try
146 {
147 Node node =
148 getParser().parseDOM(
149 new ByteArrayInputStream(
150 text.getBytes(getUTFEncodingName())),
151 null);
152
153 return node;
154 }
155 catch (UnsupportedEncodingException e)
156 {
157
158 throw new RuntimeException("UTF-8 encoding failed - " + e);
159 }
160 }
161
162 protected static String getUTFEncodingName()
163 {
164 if (utfEncodingName == null)
165 {
166 String versionNum = System.getProperty("java.version");
167 if (versionNum.startsWith("1.1"))
168 {
169 utfEncodingName = "UTF8";
170 }
171 else
172 {
173 utfEncodingName = "UTF-8";
174 }
175 }
176 return utfEncodingName;
177 }
178
179 public static void main(String[] args)
180 {
181 if (args != null && args.length > 0)
182 {
183 try
184 {
185 File input = new File(args[0]);
186
187 StringBuffer buff = new StringBuffer();
188 BufferedReader reader =
189 new BufferedReader(new FileReader(input));
190 String line = null;
191 while ((line = reader.readLine()) != null)
192 {
193 buff.append(line);
194 }
195 long start = System.currentTimeMillis();
196 BenchmarkTidy test = new BenchmarkTidy(buff.toString());
197 System.out.println(
198 "Elapsed time ms: " + (System.currentTimeMillis() - start));
199 }
200 catch (IOException e)
201 {
202 e.printStackTrace();
203 }
204 }
205 else
206 {
207 System.out.println("Please provide a filename");
208 }
209 }
210 }