Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/htmlparser/tests/BenchmarkTidy.java


1   // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/BenchmarkTidy.java,v 1.2 2004/02/10 13:41:08 woolfel Exp $
2   /*
3    * ====================================================================
4    * Copyright 2002-2004 The Apache Software Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   * 
18   */
19  
20  // The developers of JMeter and Apache are greatful to the developers
21  // of HTMLParser for giving Apache Software Foundation a non-exclusive
22  // license. The performance benefits of HTMLParser are clear and the
23  // users of JMeter will benefit from the hard work the HTMLParser
24  // team. For detailed information about HTMLParser, the project is
25  // hosted on sourceforge at http://htmlparser.sourceforge.net/.
26  //
27  // HTMLParser was originally created by Somik Raha in 2000. Since then
28  // a healthy community of users has formed and helped refine the
29  // design so that it is able to tackle the difficult task of parsing
30  // dirty HTML. Derrick Oswald is the current lead developer and was kind
31  // enough to assist JMeter.
32  
33  package org.htmlparser.tests;
34  
35  import java.io.BufferedReader;
36  import java.io.ByteArrayInputStream;
37  import java.io.File;
38  import java.io.FileReader;
39  import java.io.IOException;
40  import java.io.UnsupportedEncodingException;
41  
42  import org.w3c.dom.Document;
43  import org.w3c.dom.NamedNodeMap;
44  import org.w3c.dom.Node;
45  import org.w3c.dom.NodeList;
46  import org.w3c.tidy.Tidy;
47  import org.xml.sax.SAXException;
48  
49  /**
50   * Title:    Apache Jakarta JMeter<br>
51   * Copyright:  Copyright (c) Apache<br>
52   * Company:    Apache<br>
53   * License:<br>
54   * <br>
55   * The license is at the top!<br>
56   * <br>
57   * Description:<br>
58   * <br>
59   * This is a quick class to benchmark tidy against htmlparser.
60   * It is pretty basic and uses the same process as the original
61   * image parsing code in JMeter 1.9.0 and earlier.
62   * <p>
63   * Author:  pete<br>
64   * Version:   0.1<br>
65   * Created on:  Sep 30, 2003<br>
66   * Last Modified:  7:41:39 AM<br>
67   */
68  public class BenchmarkTidy
69  {
70  
71      protected static String utfEncodingName;
72  
73      /**
74       * 
75       */
76      public BenchmarkTidy(String data)
77      {
78          try
79          {
80              Document doc = (Document) getDOM(data);
81              parseNodes(doc, "img", false, "src");
82          }
83          catch (SAXException e)
84          {
85              e.printStackTrace();
86          }
87      }
88  
89      protected void parseNodes(
90          Document html,
91          String htmlTag,
92          boolean type,
93          String srcTag)
94      {
95  
96          NodeList nodeList = html.getElementsByTagName(htmlTag);
97          boolean uniqueBinary;
98  
99          for (int i = 0; i < nodeList.getLength(); i++)
100         {
101             uniqueBinary = true;
102             Node tempNode = nodeList.item(i);
103 
104             // get the url of the Binary
105             NamedNodeMap nnm = tempNode.getAttributes();
106             Node namedItem = null;
107 
108             if (type)
109             {
110                 // if type is set, we need 'type=image'
111                 namedItem = nnm.getNamedItem("type");
112                 if (namedItem == null)
113                 {
114                     break;
115                 }
116                 String inputType = namedItem.getNodeValue();
117 
118                 if (inputType != null && inputType.equalsIgnoreCase("image"))
119                 {
120                     // then we need to download the binary
121                 }
122                 else
123                 {
124                     break;
125                 }
126             }
127             namedItem = nnm.getNamedItem(srcTag);
128             System.out.println("Image Tag: " + htmlTag + " src=" + namedItem);
129         }
130     }
131 
132     protected static Tidy getParser()
133     {
134         Tidy tidy = new Tidy();
135         tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
136         tidy.setQuiet(true);
137         tidy.setShowWarnings(false);
138 
139         return tidy;
140     }
141 
142     protected static Node getDOM(String text) throws SAXException
143     {
144 
145         try
146         {
147             Node node =
148                 getParser().parseDOM(
149                     new ByteArrayInputStream(
150                         text.getBytes(getUTFEncodingName())),
151                     null);
152 
153             return node;
154         }
155         catch (UnsupportedEncodingException e)
156         {
157 
158             throw new RuntimeException("UTF-8 encoding failed - " + e);
159         }
160     }
161 
162     protected static String getUTFEncodingName()
163     {
164         if (utfEncodingName == null)
165         {
166             String versionNum = System.getProperty("java.version");
167             if (versionNum.startsWith("1.1"))
168             {
169                 utfEncodingName = "UTF8";
170             }
171             else
172             {
173                 utfEncodingName = "UTF-8";
174             }
175         }
176         return utfEncodingName;
177     }
178 
179     public static void main(String[] args)
180     {
181         if (args != null && args.length > 0)
182         {
183             try
184             {
185                 File input = new File(args[0]);
186 
187                 StringBuffer buff = new StringBuffer();
188                 BufferedReader reader =
189                     new BufferedReader(new FileReader(input));
190                 String line = null;
191                 while ((line = reader.readLine()) != null)
192                 {
193                     buff.append(line);
194                 }
195                 long start = System.currentTimeMillis();
196                 BenchmarkTidy test = new BenchmarkTidy(buff.toString());
197                 System.out.println(
198                     "Elapsed time ms: " + (System.currentTimeMillis() - start));
199             }
200             catch (IOException e)
201             {
202                 e.printStackTrace();
203             }
204         }
205         else
206         {
207             System.out.println("Please provide a filename");
208         }
209     }
210 }