Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/htmlparser/parserapplications/StringExtractor.java


1   // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/parserapplications/StringExtractor.java,v 1.2 2004/02/10 13:41:07 woolfel Exp $
2   /*
3    * ====================================================================
4    * Copyright 2002-2004 The Apache Software Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   * 
18   */
19  
20  // The developers of JMeter and Apache are greatful to the developers
21  // of HTMLParser for giving Apache Software Foundation a non-exclusive
22  // license. The performance benefits of HTMLParser are clear and the
23  // users of JMeter will benefit from the hard work the HTMLParser
24  // team. For detailed information about HTMLParser, the project is
25  // hosted on sourceforge at http://htmlparser.sourceforge.net/.
26  //
27  // HTMLParser was originally created by Somik Raha in 2000. Since then
28  // a healthy community of users has formed and helped refine the
29  // design so that it is able to tackle the difficult task of parsing
30  // dirty HTML. Derrick Oswald is the current lead developer and was kind
31  // enough to assist JMeter.
32  
33  package org.htmlparser.parserapplications;
34  
35  import org.htmlparser.beans.StringBean;
36  import org.htmlparser.util.ParserException;
37  
38  public class StringExtractor
39  {
40      private String resource;
41  
42      /**
43       * Construct a StringExtractor to read from the given resource.
44       * @param resource Either a URL or a file name.
45       */
46      public StringExtractor(String resource)
47      {
48          this.resource = resource;
49      }
50  
51      /**
52       * Extract the text from a page.
53       * @param links if <code>true</code> include hyperlinks in output.
54       * @return The textual contents of the page.
55       */
56      public String extractStrings(boolean links) throws ParserException
57      {
58          StringBean sb;
59  
60          sb = new StringBean();
61          sb.setLinks(links);
62          sb.setURL(resource);
63  
64          return (sb.getStrings());
65      }
66  
67      /**
68       * Mainline.
69       * @param args The command line arguments.
70       */
71      public static void main(String[] args)
72      {
73          boolean links;
74          String url;
75          StringExtractor se;
76  
77          links = false;
78          url = null;
79          for (int i = 0; i < args.length; i++)
80              if (args[i].equalsIgnoreCase("-links"))
81                  links = true;
82              else
83                  url = args[i];
84          if (null != url)
85          {
86              se = new StringExtractor(url);
87              try
88              {
89                  System.out.println(se.extractStrings(links));
90              }
91              catch (ParserException e)
92              {
93                  e.printStackTrace();
94              }
95          }
96          else
97              System.out.println(
98                  "Usage: java -classpath htmlparser.jar org.htmlparser.parserapplications.StringExtractor [-links] url");
99      }
100 }