Source code: cvebrowser/dictionary/data/parser/CSVToXMLParser.java
1 package cvebrowser.dictionary.data.parser;
2
3 import java.io.IOException;
4 import java.io.File;
5 import java.io.FileOutputStream;
6 import java.io.LineNumberReader;
7 import java.io.ObjectOutputStream;
8 import java.io.ObjectInputStream;
9
10 // SAX classes.
11 import org.xml.sax.SAXException;
12 import org.xml.sax.helpers.AttributesImpl;
13
14 //JAXP 1.1
15 import javax.xml.transform.OutputKeys;
16 import javax.xml.transform.Transformer;
17 import javax.xml.transform.stream.StreamResult;
18 import javax.xml.transform.sax.SAXTransformerFactory;
19 import javax.xml.transform.sax.TransformerHandler;
20 import javax.xml.transform.TransformerConfigurationException;
21
22 import java.util.ResourceBundle;
23 import java.util.Properties;
24 import java.util.StringTokenizer;
25 import java.util.Locale;
26 import java.util.regex.Pattern;
27 import java.util.regex.Matcher;
28 import java.util.zip.GZIPOutputStream;
29
30 import gnu.getopt.Getopt;
31
32 import cvebrowser.util.CommandLine;
33 import cvebrowser.util.parser.FileType;
34 import cvebrowser.dictionary.data.parser.CSVMitreFileFactory;
35 import cvebrowser.dictionary.data.parser.DataParserException;
36 import cvebrowser.dictionary.data.parser.CSVFile;
37 import cvebrowser.util.parser.Types;
38
39 /**
40 * CSVToXMLParser - A CVE CSV data file parser that produces a CVEbrowser compressed XML data file.
41 * @author Jose Vicente Nunez Zuleta (josevnz@users.sourceforge.net)
42 * @version 0.2 - 07/31/2003
43 */
44 public final class CSVToXMLParser implements DataParser {
45
46 private static ResourceBundle _bundle = null;
47 private Properties _properties;
48
49 /**
50 * The order of the main tokens go like this:
51 * "Name","Description","References"
52 */
53 public static final String CVE_LINE_PATTERN = "\"(CVE-\\d{4}+-\\d{4}+)\",\"(.+)\",\"(.+)\"";
54
55 /**
56 * The order of the main tokens go like this:
57 * "CANDIDATE","DESCRIPTION","PHASE","REFERENCES","VOTES","COMMENTS"
58 */
59 public static final String CAN_LINE_PATTERN = "\"(CAN-\\d{4}+-\\d{4}+)\",\"(.+)\",\"(.+)\",\"(.+)\",\"(.+)\",\"(.+)\"";
60
61 /**
62 * References pattern (better than a simple StringTokenizer).
63 * "SGI:19981006-01-I,CERT:CA-98.12.mountd,CIAC:J-006,BID:121,XF:linux-mountd-bo"
64 * There are also special cases like this 'CISCO:http://www.cisco.com/warp/public/770/land-pub.shtml'
65 */
66 public static final String REFERENCES_LINE_PATTERN = "([a-zA-Z0-9-]*):(.+)";
67
68 /**
69 * Contains a list of XML tags used on the cvebrowser XML document.
70 * This list was taken from the CVEBROWSER DTD.
71 *
72 */
73 public static final String [] XML_TAG_LIST = {
74 "cvebrowser",
75 "vulnerability",
76 "candidate",
77 "reference"
78 };
79
80 /**
81 * Default output buffer size for the XML files.
82 */
83 public static final int DEFAULT_OUTPUT_BUFFER_SIZE = 8192;
84
85 /**
86 * Number of required expected parameters for the command line
87 */
88 public static final int EXPECTED_PARAMETERS = 3;
89
90 /**
91 * Parametric constructor.
92 * Required keys:
93 * <ul>
94 * <li> cvebrowser.dictionary.data.parser.CSVToXMLParser.dtd - Location of the output DTD
95 * </ul>
96 * @param prop_ Configuration parameters for the parser.
97 * @throws IllegalArgumentException
98 */
99 public CSVToXMLParser(Properties prop_) throws IllegalArgumentException {
100 if (prop_ == null) {
101 throw new IllegalArgumentException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.CSVToXMLParser.error.argumentIsNull") + ": prop_");
102 }
103 if (prop_.getProperty("cvebrowser.dictionary.data.parser.CSVToXMLParser.dtd") == null) {
104 throw new IllegalArgumentException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.CSVToXMLParser.error.propertyIsMissing") + ": 'cvebrowser.dictionary.data.parser.CSVToXMLParser.dtd'");
105 }
106 _properties = prop_;
107 }
108
109 /**
110 * Takes a Mitre data file and creates an XML output format used to load the CVE / CAN data into the local database. The file is compressed to save space.
111 * @param mitreDataFile_ mitreDataFile_ The full path to the CVE data file
112 * @param outputfile_ The full path to the output file
113 * @throws DataParserException If there are errors parsing the file.
114 * @throws IOExceptionIf there are errors parsing the file.
115 * @return boolean
116 * @since 0.1
117 */
118 public boolean parse(String mitreDataFile_, String outputfile_) throws DataParserException, IOException {
119 if (mitreDataFile_ == null) {
120 throw new NullPointerException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.CSVToXMLParser.error.argumentIsNull") + ": mitreDataFile_");
121 }
122 if (outputfile_ == null) {
123 throw new NullPointerException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.CSVToXMLParser.error.argumentIsNull") + ": outputfile_");
124 }
125 boolean status = false;
126 GZIPOutputStream out = null;
127 FileOutputStream fout = null;
128 CSVFile CSVfile = null;
129 LineNumberReader lineReader = null;
130 String line = null;
131 String version = null;
132 Pattern linePattern = null;
133 Matcher lineMatcher = null;
134 Pattern refPattern = Pattern.compile(REFERENCES_LINE_PATTERN);
135 Matcher refMatcher = null;
136 int groups = 0;
137 try {
138 CSVfile = CSVMitreFileFactory.getCSVFile(mitreDataFile_);
139 lineReader = CSVfile.getLineNumberReader();
140 fout = new FileOutputStream(outputfile_);
141 out = new GZIPOutputStream(fout, DEFAULT_OUTPUT_BUFFER_SIZE);
142 StreamResult streamResult = new StreamResult(out);
143 SAXTransformerFactory tf = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
144 // SAX2.0 ContentHandler.
145 TransformerHandler transHandler = tf.newTransformerHandler();
146 Transformer serializer = transHandler.getTransformer();
147 serializer.setOutputProperty(OutputKeys.ENCODING,"ISO-8859-1");
148 serializer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,_properties.getProperty("cvebrowser.dictionary.data.parser.CSVToXMLParser.dtd"));
149 serializer.setOutputProperty(OutputKeys.INDENT,"yes");
150 serializer.setOutputProperty(OutputKeys.STANDALONE,"no");
151
152 transHandler.setResult(streamResult);
153 transHandler.startDocument();
154 AttributesImpl atts = new AttributesImpl();
155 AttributesImpl refatts = new AttributesImpl(); // Used to delay the reference attribute writing until the end on for a CAN type
156 version = CSVfile.getVersion(); // The version appears only once on teh CSV file, but is repeated through the resulting XML file
157 transHandler.startElement("","",XML_TAG_LIST[0],atts);
158 /*
159 * Depending of the type of the file, choose a parsing strategy
160 */
161 switch (CSVfile.getType()) {
162 case Types.DATA_TYPE_CVE:// CVE file
163 groups = 3;
164 // Ignore the first two lines, they're headers
165 line = lineReader.readLine();
166 linePattern = Pattern.compile(CVE_LINE_PATTERN);
167 while (line != null) {
168 atts.clear();
169 lineMatcher = linePattern.matcher(line);
170 // Ignore any line that doesn't start with a "CVE-"
171 if (lineMatcher.matches()) {
172 for (int idx = 1; idx <= groups; idx++) {
173 switch (idx) {
174 case 1: // Get the CVE code attribute and write the opening 'vulnerability' tag
175 atts.clear();
176 atts.addAttribute("","","version","NMTOKEN",version);
177 /*
178 * Break the original code. original code = TYPE-YYYY-NNNN.
179 * Luckyly the CVE code has a fixed length (13 chars), which makes it easier to decompose.
180 * Breaking the code, also makes it easier to get the values from the database, index creation.
181 */
182 atts.addAttribute("","","type","NMTOKEN",lineMatcher.group(idx).substring(0,3));
183 atts.addAttribute("","","code","NMTOKEN",lineMatcher.group(idx).substring(4,13)); // Ignore the '-'
184 break;
185 case 2: // Get the CVE description
186 atts.addAttribute("","","description","CDATA",lineMatcher.group(idx));
187 transHandler.startElement("","",XML_TAG_LIST[1],atts);
188 break;
189 case 3: // Get the CVE references
190 StringTokenizer refTok = new StringTokenizer(lineMatcher.group(idx), ",");
191 for (int idx2 = 1; idx2 <= refTok.countTokens(); idx2++) {
192 refMatcher = refPattern.matcher(refTok.nextToken());
193 if (refMatcher.matches()) {
194 refatts.clear();
195 refatts.addAttribute("","","organization","CDATA",refMatcher.group(1));
196 refatts.addAttribute("","","name","CDATA",refMatcher.group(2));
197 transHandler.startElement("","",XML_TAG_LIST[3],refatts);
198 transHandler.endElement("","",XML_TAG_LIST[3]);
199 }
200 }
201 break;
202 } // end switch
203 } // end for token
204 transHandler.endElement("","",XML_TAG_LIST[1]); // close the vulnerability tag
205 } // end if
206 line = lineReader.readLine();
207 if (! status) {
208 status = true;
209 }
210 } // end while
211 break;
212 case Types.DATA_TYPE_CAN:// CAN file
213 groups = 6;
214 line = lineReader.readLine();
215 linePattern = Pattern.compile(CAN_LINE_PATTERN);
216 while (line != null) {
217 atts.clear();
218 lineMatcher = linePattern.matcher(line);
219 // Ignore any line that doesn't start with a "CAN-"
220 if (lineMatcher.matches()) {
221 for (int idx = 1; idx <= groups; idx++) {
222 switch (idx) {
223 case 1:// Get the CAN code attribute and write the opening 'vulnerability' tag
224 atts.clear();
225 atts.addAttribute("","","version","NMTOKEN",version);
226 atts.addAttribute("","","type","NMTOKEN",lineMatcher.group(idx).substring(0,3));
227 atts.addAttribute("","","code","NMTOKEN",lineMatcher.group(idx).substring(4,13)); // Ignore the '-'
228 break;
229 case 2: // Get the CAN description
230 atts.addAttribute("","","description","CDATA",lineMatcher.group(idx));
231 transHandler.startElement("","",XML_TAG_LIST[1],atts);
232 break;
233 case 3: // Get the phase attribute (Lazzy write: don't write it yet, wait until all the references are written)
234 atts.clear();
235 atts.addAttribute("","","phase","CDATA",lineMatcher.group(idx));
236 break;
237 case 4: // Get the CAN references and write the elements rigth away, before the candidate element.
238 StringTokenizer refTok = new StringTokenizer(lineMatcher.group(idx), " |");
239 for (int idx2 = 1; idx2 <= refTok.countTokens(); idx2++) {
240 refMatcher = refPattern.matcher(refTok.nextToken());
241 if (refMatcher.matches()) {
242 refatts.clear();
243 refatts.addAttribute("","","organization","CDATA",refMatcher.group(1));
244 refatts.addAttribute("","","name","CDATA",refMatcher.group(2));
245 transHandler.startElement("","",XML_TAG_LIST[3], refatts);
246 transHandler.endElement("","",XML_TAG_LIST[3]);
247 }
248 }
249 break;
250 case 5:// Get the CAN votes as a single attribute
251 // Replace the "|" with carriage return
252 String votes = lineMatcher.group(idx).replaceAll("\\s{1,}+\\|", System.getProperty("line.separator"));
253 atts.addAttribute("","","votes","CDATA", votes);
254 break;
255 case 6:// Get the CAN comments. The candidate tag writing is written here, after all the attributes are gathered.
256 // Replace the "|" with carriage return
257 String comments = lineMatcher.group(idx).replaceAll("\\s{1,}+\\|", System.getProperty("line.separator"));
258 atts.addAttribute("","","comments","CDATA", comments);
259 transHandler.startElement("","",XML_TAG_LIST[2], atts);
260 transHandler.endElement("","",XML_TAG_LIST[2]);
261 break;
262 } // end switch
263 } // end for token
264 transHandler.endElement("","",XML_TAG_LIST[1]); // close the vulnerability tag
265 } // end if
266 line = lineReader.readLine();
267 if (! status) {
268 status = true;
269 }
270 } // end while
271 break;
272 default:// Unkown file
273 throw new DataParserException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.parse.error.typeUnknown"));
274 }
275 transHandler.endElement("","",XML_TAG_LIST[0]);
276 transHandler.endDocument();
277 } catch (Throwable throwbl) {
278 status = false;
279 throw new DataParserException(throwbl.toString(), throwbl);
280 } finally {
281 if (CSVfile != null) {
282 out.finish();
283 fout.flush();
284 CSVfile.close(); // this closes also the inputstream
285 }
286 }
287 return status;
288 }
289
290 /**
291 * Command line entry point.
292 * Accepted parameters:
293 * <ul>
294 * <li> -s : Mitre CVS source file
295 * <li> -d : Destination full path for the ouput XML data file
296 * <li> -x : Location of the DTD required for the XML validation
297 * </ul>
298 * Optional parameters:
299 * <ul>
300 * <li> -l : Languaje to use
301 * <li> -k : Country to use
302 * <li> -h : Show the localized mini help.
303 * </ul>
304 * @param argv_
305 * @throws DataParserException
306 * @since 0.1
307 */
308 public static void main(String [] argv_) throws DataParserException {
309 Getopt optList = new Getopt(CSVToXMLParser.class.getName(), argv_, "s:d:x:l:k:h");
310 CSVToXMLParser install = null;
311 String mitreFile = null;
312 String outputFile = null;
313 Properties prop = new Properties();
314 String country = null;
315 String languaje = null;
316 boolean getHelp = false;
317 int requiredParametersCounter = 0;
318 try {
319 int option;
320 while ((option = optList.getopt()) != -1) {
321 switch(option) {
322 case 's':
323 mitreFile = optList.getOptarg().trim();
324 requiredParametersCounter++;
325 break;
326 case 'd':
327 outputFile = optList.getOptarg().trim();
328 requiredParametersCounter++;
329 break;
330 case 'x':
331 prop.setProperty("cvebrowser.dictionary.data.parser.CSVToXMLParser.dtd", optList.getOptarg().trim());
332 requiredParametersCounter++;
333 break;
334 case 'h':
335 getHelp = true;
336 break;
337 case 'l':
338 languaje = optList.getOptarg().trim();
339 break;
340 case 'k':
341 country = optList.getOptarg().trim();
342 break;
343 case '?':
344 break; // getopt() already printed an error
345 default:
346 }
347 }
348 CommandLine.setDefaultLanguaje(languaje, country);
349 _bundle = ResourceBundle.getBundle(CSVToXMLParser.class.getName(), Locale.getDefault());
350 if (getHelp) {
351 System.out.println(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.main.info.usageMode"));
352 return;
353 }
354 CommandLine.checkAllTheParameters(requiredParametersCounter, EXPECTED_PARAMETERS, true);
355 install = new CSVToXMLParser(prop);
356 if (! install.parse(mitreFile, outputFile)) {
357 throw new DataParserException(_bundle.getString("cvebrowser.dictionary.data.parser.CSVToXMLParser.main.error.parseError"));
358 }
359 } catch (Exception exp) {
360 throw new DataParserException(exp.toString(), exp);
361 }
362 }
363
364 /**
365 * Make this class uncloneable. Anyone who wants to use this class must use the constructor.
366 * @throws CloneNotSupportedException
367 */
368 public final Object clone() throws java.lang.CloneNotSupportedException {
369 throw new java.lang.CloneNotSupportedException();
370 }
371
372 /**
373 * Make this class unserializable. Any attempt to serialize will throw an exception.
374 * @param out_
375 * @throws IOException
376 */
377 private final void writeObject(ObjectOutputStream out_) throws java.io.IOException {
378 throw new java.io.IOException();
379 }
380
381 /**
382 * Make this class undeserializeable. Throw an exception if this method is ever called.
383 * @param in_
384 * @throws IOException
385 */
386 private final void readObject(ObjectInputStream in_) throws java.io.IOException {
387 throw new java.io.IOException();
388 }
389 }