Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: org/embl/ebi/escience/scuflworkers/java/ExtractImageLinks.java


1   /**
2    * This file is a component of the Taverna project,
3    * and is licensed under the GNU LGPL.
4    * Copyright Tom Oinn, EMBL-EBI
5    */
6   package org.embl.ebi.escience.scuflworkers.java;
7   
8   import uk.ac.soton.itinnovation.taverna.enactor.entities.TaskExecutionException;
9   import java.util.*;
10  import java.io.*;
11  import java.net.*;
12  import org.embl.ebi.escience.baclava.*;
13  
14  /**
15   * Extract a list of all image links in the supplied html document
16   * @author Tom Oinn
17   */
18  public class ExtractImageLinks implements LocalWorker {
19      
20      private static final String NEWLINE = System.getProperty("line.separator");
21  
22      public String[] inputNames() {
23    return new String[]{"document"};
24      }
25      public String[] inputTypes() {
26    return new String[]{"'text/html'"};
27      }
28      public String[] outputNames() {
29    return new String[]{"imagelinks"};
30      }
31      public String[] outputTypes() {
32    return new String[]{"l('text/x-taverna-web-url')"};
33      }
34      
35      /**
36       * Fetch the web page pointed to by the URL supplied as the 'url'
37       * parameter into the service, the 'base' parameter specifies a 
38       * URL to use as the base for relative URL resolution.
39       */
40      public Map execute(Map inputs) throws TaskExecutionException {
41    String content = (String)((DataThing)(inputs.get("document"))).getDataObject();
42    String lowerCaseContent = content.toLowerCase();
43    int index = 0;
44    List urlList = new ArrayList();
45    while ((index = lowerCaseContent.indexOf("<img", index)) != -1) {
46        if ((index = lowerCaseContent.indexOf("src", index)) == -1) 
47      break;
48        if ((index = lowerCaseContent.indexOf("=", index)) == -1) 
49      break;
50        index++;
51        String remaining = content.substring(index);
52        StringTokenizer st = new StringTokenizer(remaining, "\t\n\r\">#");
53        String strLink = st.nextToken();
54        urlList.add(strLink);
55    }
56    Map outputs = new HashMap();
57    outputs.put("imagelinks",new DataThing(urlList));
58    return outputs;
59      }
60  
61  
62  }