Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: cvebrowser/dictionary/data/parser/CSVToXMLParser.java


1   package cvebrowser.dictionary.data.parser;
2   
3   import java.io.IOException;
4   import java.io.File;
5   import java.io.FileOutputStream;
6   import java.io.LineNumberReader;
7   import java.io.ObjectOutputStream;
8   import java.io.ObjectInputStream;
9   
10  // SAX classes.
11  import org.xml.sax.SAXException;
12  import org.xml.sax.helpers.AttributesImpl;
13  
14  //JAXP 1.1
15  import javax.xml.transform.OutputKeys;
16  import javax.xml.transform.Transformer;
17  import javax.xml.transform.stream.StreamResult;
18  import javax.xml.transform.sax.SAXTransformerFactory;
19  import javax.xml.transform.sax.TransformerHandler;
20  import javax.xml.transform.TransformerConfigurationException;
21  
22  import java.util.ResourceBundle;
23  import java.util.Properties;
24  import java.util.StringTokenizer;
25  import java.util.Locale;
26  import java.util.regex.Pattern;
27  import java.util.regex.Matcher;
28  import java.util.zip.GZIPOutputStream;
29  
30  import gnu.getopt.Getopt;
31  
32  import cvebrowser.util.CommandLine;
33  import cvebrowser.util.parser.FileType;
34  import cvebrowser.dictionary.data.parser.CSVMitreFileFactory;
35  import cvebrowser.dictionary.data.parser.DataParserException;
36  import cvebrowser.dictionary.data.parser.CSVFile;
37  import cvebrowser.util.parser.Types;
38  
39  /**
40   * CSVToXMLParser  - A CVE CSV data file parser that produces a CVEbrowser compressed XML data file.
41   * @author Jose Vicente Nunez Zuleta (josevnz@users.sourceforge.net)
42   * @version 0.2 - 07/31/2003
43   */
44  public final class CSVToXMLParser implements DataParser {
45    
46    private static ResourceBundle _bundle = null;
47    private Properties _properties;
48    
49    /**
50     * The order of the main tokens go like this:
51     * "Name","Description","References"
52     */
53    public static final String CVE_LINE_PATTERN = "\"(CVE-\\d{4}+-\\d{4}+)\",\"(.+)\",\"(.+)\"";
54    
55    /**
56     * The order of the main tokens go like this:
57     * "CANDIDATE","DESCRIPTION","PHASE","REFERENCES","VOTES","COMMENTS"
58     */
59    public static final String CAN_LINE_PATTERN = "\"(CAN-\\d{4}+-\\d{4}+)\",\"(.+)\",\"(.+)\",\"(.+)\",\"(.+)\",\"(.+)\"";
60    
61    /**
62     * References pattern (better than a simple StringTokenizer).
63     * "SGI:19981006-01-I,CERT:CA-98.12.mountd,CIAC:J-006,BID:121,XF:linux-mountd-bo"
64     * There are also special cases like this 'CISCO:http://www.cisco.com/warp/public/770/land-pub.shtml'
65     */
66    public static final String REFERENCES_LINE_PATTERN = "([a-zA-Z0-9-]*):(.+)";
67    
68    /**
69     * Contains a list of XML tags used on the cvebrowser XML document.
70     * This list was taken from the CVEBROWSER DTD.
71     *
72     */
73    public static final String [] XML_TAG_LIST = {
74      "cvebrowser",
75      "vulnerability",
76      "candidate",
77      "reference"
78    };
79    
80    /**
81     * Default output buffer size for the XML files.
82     */
83    public static final int DEFAULT_OUTPUT_BUFFER_SIZE = 8192;
84    
85    /**
86     * Number of required expected parameters for the command line
87     */
88    public static final int EXPECTED_PARAMETERS = 3;
89    
90    /**
91     * Parametric constructor.
92     * Required keys:
93     * <ul>
94     * <li> cvebrowser.dictionary.data.parser.CSVToXMLParser.dtd - Location of the output DTD
95     * </ul>
96     * @param prop_ Configuration parameters for the parser.
97     * @throws IllegalArgumentException
98     */
99    public CSVToXMLParser(Properties prop_) throws IllegalArgumentException {
100     if (prop_ == null) {
101       throw new IllegalArgumentException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.CSVToXMLParser.error.argumentIsNull") + ": prop_");
102     }
103     if (prop_.getProperty("cvebrowser.dictionary.data.parser.CSVToXMLParser.dtd") == null) {
104       throw new IllegalArgumentException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.CSVToXMLParser.error.propertyIsMissing") + ": 'cvebrowser.dictionary.data.parser.CSVToXMLParser.dtd'");
105     }
106     _properties = prop_;
107   }
108 
109   /**
110    * Takes a Mitre data file and creates an XML output format used to load the CVE / CAN data into the local database. The file is compressed to save space.
111    * @param mitreDataFile_ mitreDataFile_ The full path to the CVE data file
112    * @param outputfile_ The full path to the output file
113    * @throws DataParserException If there are errors parsing the file.
114    * @throws IOExceptionIf there are errors parsing the file.
115    * @return boolean
116    * @since 0.1
117    */
118   public boolean parse(String mitreDataFile_, String outputfile_) throws DataParserException, IOException {
119     if (mitreDataFile_ == null) {
120       throw new NullPointerException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.CSVToXMLParser.error.argumentIsNull") + ": mitreDataFile_");
121     }
122     if (outputfile_ == null) {
123       throw new NullPointerException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.CSVToXMLParser.error.argumentIsNull") + ": outputfile_");
124     }
125     boolean status = false;
126     GZIPOutputStream out = null;
127     FileOutputStream fout = null;
128     CSVFile CSVfile = null;
129     LineNumberReader lineReader = null;
130     String line = null;
131     String version = null;
132     Pattern linePattern = null;
133     Matcher lineMatcher = null;
134     Pattern refPattern = Pattern.compile(REFERENCES_LINE_PATTERN);
135     Matcher refMatcher = null;
136     int groups = 0;
137     try {
138       CSVfile = CSVMitreFileFactory.getCSVFile(mitreDataFile_);
139       lineReader = CSVfile.getLineNumberReader();
140       fout = new FileOutputStream(outputfile_);
141       out = new GZIPOutputStream(fout, DEFAULT_OUTPUT_BUFFER_SIZE);
142       StreamResult streamResult = new StreamResult(out);
143       SAXTransformerFactory tf = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
144       // SAX2.0 ContentHandler.
145       TransformerHandler transHandler = tf.newTransformerHandler();
146       Transformer serializer = transHandler.getTransformer();
147       serializer.setOutputProperty(OutputKeys.ENCODING,"ISO-8859-1");
148       serializer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,_properties.getProperty("cvebrowser.dictionary.data.parser.CSVToXMLParser.dtd"));
149       serializer.setOutputProperty(OutputKeys.INDENT,"yes");
150       serializer.setOutputProperty(OutputKeys.STANDALONE,"no");
151       
152       transHandler.setResult(streamResult);
153       transHandler.startDocument();
154       AttributesImpl atts = new AttributesImpl();
155       AttributesImpl refatts = new AttributesImpl(); // Used to delay the reference attribute writing until the end on for a CAN type
156       version = CSVfile.getVersion(); // The version appears only once on teh CSV file, but is repeated through the resulting XML file
157       transHandler.startElement("","",XML_TAG_LIST[0],atts);
158       /*
159        * Depending of the type of the file, choose a parsing strategy
160        */
161       switch (CSVfile.getType()) {
162         case Types.DATA_TYPE_CVE:// CVE file
163           groups = 3;
164           // Ignore the first two lines, they're headers
165           line = lineReader.readLine();
166           linePattern = Pattern.compile(CVE_LINE_PATTERN);
167           while (line != null) {
168             atts.clear();
169             lineMatcher = linePattern.matcher(line);
170             // Ignore any line that doesn't start with a "CVE-"
171             if (lineMatcher.matches()) {
172               for (int idx = 1; idx <= groups; idx++) {
173                 switch (idx) {
174                   case 1: // Get the CVE code attribute and write the opening 'vulnerability' tag
175                     atts.clear();
176                     atts.addAttribute("","","version","NMTOKEN",version);
177                     /*
178                      * Break the original code. original code = TYPE-YYYY-NNNN.
179                      * Luckyly the CVE code has a fixed length (13 chars), which makes it easier to decompose.
180                      * Breaking the code, also makes it easier to get the values from the database, index creation.
181                      */
182                     atts.addAttribute("","","type","NMTOKEN",lineMatcher.group(idx).substring(0,3));
183                     atts.addAttribute("","","code","NMTOKEN",lineMatcher.group(idx).substring(4,13)); // Ignore the '-'
184                     break;
185                   case 2: // Get the CVE description
186                     atts.addAttribute("","","description","CDATA",lineMatcher.group(idx));
187                     transHandler.startElement("","",XML_TAG_LIST[1],atts);
188                     break;
189                   case 3: // Get the CVE references
190                     StringTokenizer refTok = new StringTokenizer(lineMatcher.group(idx), ",");
191                     for (int idx2 = 1; idx2 <= refTok.countTokens(); idx2++) {
192                       refMatcher = refPattern.matcher(refTok.nextToken());
193                       if (refMatcher.matches()) {
194                         refatts.clear();
195                         refatts.addAttribute("","","organization","CDATA",refMatcher.group(1));
196                         refatts.addAttribute("","","name","CDATA",refMatcher.group(2));
197                         transHandler.startElement("","",XML_TAG_LIST[3],refatts);
198                         transHandler.endElement("","",XML_TAG_LIST[3]);
199                       }
200                     }
201                     break;
202                 } // end switch
203               } // end for token
204               transHandler.endElement("","",XML_TAG_LIST[1]); // close the vulnerability tag
205             } // end if
206             line = lineReader.readLine();
207             if (! status) {
208               status = true;
209             }
210           } // end while
211           break;
212         case Types.DATA_TYPE_CAN:// CAN file
213           groups = 6;
214           line = lineReader.readLine();
215           linePattern = Pattern.compile(CAN_LINE_PATTERN);
216           while (line != null) {
217             atts.clear();
218             lineMatcher = linePattern.matcher(line);
219             // Ignore any line that doesn't start with a "CAN-"
220             if (lineMatcher.matches()) {
221               for (int idx = 1; idx <= groups; idx++) {
222                 switch (idx) {
223                   case 1:// Get the CAN code attribute and write the opening 'vulnerability' tag
224                     atts.clear();
225                     atts.addAttribute("","","version","NMTOKEN",version);
226                     atts.addAttribute("","","type","NMTOKEN",lineMatcher.group(idx).substring(0,3));
227                     atts.addAttribute("","","code","NMTOKEN",lineMatcher.group(idx).substring(4,13)); // Ignore the '-'
228                     break;
229                   case 2: // Get the CAN description
230                     atts.addAttribute("","","description","CDATA",lineMatcher.group(idx));
231                     transHandler.startElement("","",XML_TAG_LIST[1],atts);
232                     break;
233                   case 3: // Get the phase attribute (Lazzy write: don't write it yet, wait until all the references are written)
234                     atts.clear();
235                     atts.addAttribute("","","phase","CDATA",lineMatcher.group(idx));
236                     break;
237                   case 4: // Get the CAN references and write the elements rigth away, before the candidate element.
238                     StringTokenizer refTok = new StringTokenizer(lineMatcher.group(idx), "   |");
239                     for (int idx2 = 1; idx2 <= refTok.countTokens(); idx2++) {
240                       refMatcher = refPattern.matcher(refTok.nextToken());
241                       if (refMatcher.matches()) {
242                         refatts.clear();
243                         refatts.addAttribute("","","organization","CDATA",refMatcher.group(1));
244                         refatts.addAttribute("","","name","CDATA",refMatcher.group(2));
245                         transHandler.startElement("","",XML_TAG_LIST[3], refatts);
246                         transHandler.endElement("","",XML_TAG_LIST[3]);
247                       }
248                     }
249                     break;
250                   case 5:// Get the CAN votes as a single attribute
251                     // Replace the "|" with carriage return
252                     String votes = lineMatcher.group(idx).replaceAll("\\s{1,}+\\|", System.getProperty("line.separator"));
253                     atts.addAttribute("","","votes","CDATA", votes);
254                     break;
255                   case 6:// Get the CAN comments. The candidate tag writing is written here, after all the attributes are gathered.
256                     // Replace the "|" with carriage return
257                     String comments = lineMatcher.group(idx).replaceAll("\\s{1,}+\\|", System.getProperty("line.separator"));
258                     atts.addAttribute("","","comments","CDATA", comments);
259                     transHandler.startElement("","",XML_TAG_LIST[2], atts);
260                     transHandler.endElement("","",XML_TAG_LIST[2]);
261                     break;
262                 } // end switch
263               } // end for token
264               transHandler.endElement("","",XML_TAG_LIST[1]); // close the vulnerability tag
265             } // end if
266             line = lineReader.readLine();
267             if (! status) {
268               status = true;
269             }
270           } // end while
271           break;
272         default:// Unkown file
273           throw new DataParserException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.parse.error.typeUnknown"));
274       }
275       transHandler.endElement("","",XML_TAG_LIST[0]);
276       transHandler.endDocument();
277     } catch (Throwable throwbl) {
278       status = false;
279       throw new DataParserException(throwbl.toString(), throwbl);
280     } finally {
281       if (CSVfile != null) {
282         out.finish();
283         fout.flush();
284         CSVfile.close(); // this closes also the inputstream
285       }
286     }
287     return status;
288   }
289   
290   /**
291    * Command line entry point.
292    * Accepted parameters:
293    * <ul>
294    * <li> -s : Mitre CVS source file
295    * <li> -d : Destination full path for the ouput XML data file
296    * <li> -x : Location of the DTD required for the XML validation
297    * </ul>
298    * Optional parameters:
299    * <ul>
300    * <li> -l : Languaje to use
301    * <li> -k : Country to use
302    * <li> -h : Show the localized mini help.
303    * </ul>
304    * @param argv_
305    * @throws DataParserException
306    * @since 0.1
307    */
308   public static void main(String [] argv_) throws DataParserException {
309     Getopt optList = new Getopt(CSVToXMLParser.class.getName(), argv_, "s:d:x:l:k:h");
310     CSVToXMLParser install = null;
311     String mitreFile = null;
312     String outputFile = null;
313     Properties prop = new Properties();
314     String country = null;
315     String languaje = null;
316     boolean getHelp = false;
317     int requiredParametersCounter = 0;
318     try {
319       int option;
320       while ((option = optList.getopt()) != -1) {
321         switch(option) {
322           case 's':
323             mitreFile = optList.getOptarg().trim();
324             requiredParametersCounter++;
325             break;
326           case 'd':
327             outputFile = optList.getOptarg().trim();
328             requiredParametersCounter++;
329             break;
330           case 'x':
331             prop.setProperty("cvebrowser.dictionary.data.parser.CSVToXMLParser.dtd", optList.getOptarg().trim());
332             requiredParametersCounter++;
333             break;
334           case 'h':
335             getHelp = true;
336             break;
337           case 'l':
338             languaje = optList.getOptarg().trim();
339             break;
340           case 'k':
341             country = optList.getOptarg().trim();
342             break;
343           case '?':
344             break; // getopt() already printed an error
345             default:
346         }
347       }
348       CommandLine.setDefaultLanguaje(languaje, country);
349       _bundle = ResourceBundle.getBundle(CSVToXMLParser.class.getName(), Locale.getDefault());
350       if (getHelp) {
351         System.out.println(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.main.info.usageMode"));
352         return;
353       }
354       CommandLine.checkAllTheParameters(requiredParametersCounter, EXPECTED_PARAMETERS, true);
355       install = new CSVToXMLParser(prop);
356       if (! install.parse(mitreFile, outputFile)) {
357         throw new DataParserException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.main.error.parseError"));
358       }
359     } catch (Exception exp) {
360       throw new DataParserException(exp.toString(), exp);
361     }
362   }
363   
364   /**
365    * Make this class uncloneable. Anyone who wants to use this class must use the constructor.
366    * @throws CloneNotSupportedException
367    */
368    public final Object clone() throws java.lang.CloneNotSupportedException {
369      throw new java.lang.CloneNotSupportedException();
370    }
371    
372    /**
373     * Make this class unserializable. Any attempt to serialize will throw an exception.
374     * @param out_
375     * @throws IOException
376     */
377    private final void writeObject(ObjectOutputStream out_) throws java.io.IOException {
378       throw new java.io.IOException();
379    }
380    
381   /**
382    * Make this class undeserializeable. Throw an exception if this method is ever called.
383    * @param in_
384    * @throws IOException
385    */
386    private final void readObject(ObjectInputStream in_) throws java.io.IOException {
387      throw new java.io.IOException();
388    }
389 }