Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/port80/html/tidy/Configuration.java


1   /*
2    * @(#)Configuration.java   1.11 2000/08/16
3    *
4    */
5   
6   package com.port80.html.tidy;
7   
8   /**
9    *
10   * Read configuration file and manage configuration properties.
11   *
12   * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13   * See Tidy.java for the copyright notice.
14   * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
15   * HTML Tidy Release 4 Aug 2000</a>
16   *
17   * @author  Dave Raggett <dsr@w3.org>
18   * @author  Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19   * @version 1.0, 1999/05/22
20   * @version 1.0.1, 1999/05/29
21   * @version 1.1, 1999/06/18 Java Bean
22   * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23   * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24   * @version 1.4, 1999/09/04 DOM support
25   * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26   * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27   * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28   * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29   * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30   * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31   * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
32   */
33  
34  /*
35    Configuration files associate a property name with a value.
36    The format is that of a Java .properties file.
37  */
38  
39  import java.io.FileInputStream;
40  import java.io.IOException;
41  import java.util.Enumeration;
42  import java.util.Properties;
43  import java.util.StringTokenizer;
44  
45  public class Configuration implements java.io.Serializable {
46  
47    ////////////////////////////////////////////////////////////////////////////////////
48  
49    /* character encodings */
50    public static final int RAW = 0;
51    public static final int ASCII = 1;
52    public static final int LATIN1 = 2;
53    public static final int UTF8 = 3;
54    public static final int ISO2022 = 4;
55    public static final int MACROMAN = 5;
56  
57    /* mode controlling treatment of doctype */
58    public static final int DOCTYPE_OMIT = 0;
59    public static final int DOCTYPE_AUTO = 1;
60    public static final int DOCTYPE_STRICT = 2;
61    public static final int DOCTYPE_LOOSE = 3;
62    public static final int DOCTYPE_USER = 4;
63  
64    ////////////////////////////////////////////////////////////////////////////////////
65  
66    protected int indent = 4; /* default indentation */
67    protected int tabsize = 4;
68    protected int wraplen = 120; /* default wrap margin */
69    protected int CharEncoding = ASCII;
70  
71    protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */
72    protected String slidestyle = null; /* style sheet for slides */
73    protected String docTypeStr = null; /* user specified doctype */
74    protected String errfile = null; /* file name to write errors to */
75    protected boolean writeback = false; /* if true then output tidied markup */
76  
77    protected boolean OnlyErrors = false; /* if true normal output is suppressed */
78    protected boolean ShowWarnings = true; /* however errors are always shown */
79    protected boolean ShowInfo = true; /* show informative warnings. */
80    protected boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */
81    protected boolean IndentContent = true; /* indent content of appropriate tags */
82    protected boolean SmartIndent = true; /* does text/block level content effect indentation */
83    protected boolean HideEndTags = false; /* suppress optional end tags */
84    protected boolean XmlTags = false; /* treat input as XML */
85    protected boolean XmlOut = false; /* create output as XML */
86    protected boolean xHTML = false; /* output extensible HTML */
87    protected boolean XmlPi = false; /* add <?xml?> for XML docs */
88    protected boolean RawOut = false; /* avoid mapping values > 127 to entities */
89    protected boolean UpperCaseTags = false; /* output tags in upper not lower case */
90    protected boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */
91    protected boolean MakeClean = false; /* remove presentational clutter */
92    protected boolean LogicalEmphasis = false; /* replace i by em and b by strong */
93    protected boolean DropFontTags = false; /* discard presentation tags */
94    protected boolean DropEmptyParas = true; /* discard empty p elements */
95    protected boolean FixComments = true; /* fix comments with adjacent hyphens */
96    protected boolean BreakBeforeBR = false; /* o/p newline before <br> or not? */
97    protected boolean BurstSlides = false; /* create slides on each h2 element */
98    protected boolean NumEntities = false; /* use numeric entities */
99    protected boolean QuoteMarks = false; /* output " marks as &quot; */
100   protected boolean QuoteNbsp = true; /* output non-breaking space as entity */
101   // Converting & to &amp; would cause & to becomes &amp;amp;... when formatted
102   // more that once, so it would not work.  Let user fix the document.
103   protected boolean QuoteAmpersand = false; /* output naked ampersand as &amp; */
104   protected boolean WrapAttVals = false; /* wrap within attribute values */
105   protected boolean WrapScriptlets = false; /* wrap within JavaScript string literals */
106   protected boolean WrapSection = true; /* wrap within <![ ... ]> section tags */
107   protected boolean WrapAsp = true; /* wrap within ASP pseudo elements */
108   protected boolean WrapJste = true; /* wrap within JSTE pseudo elements */
109   protected boolean WrapPhp = true; /* wrap within PHP pseudo elements */
110   protected boolean FixBackslash = true; /* fix URLs by replacing \ with / */
111   protected boolean IndentAttributes = false; /* newline+indent before each attribute */
112   protected boolean XmlPIs = false; /* if set to yes PIs must end with ?> */
113   protected boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */
114   protected boolean EncloseBodyText = false; /* if yes text at body is wrapped in <p>'s */
115   protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
116   protected boolean KeepFileTimes = true; /* if yes last modied time is preserved */
117   protected boolean Word2000 = false; /* draconian cleaning for Word2000 */
118   protected boolean TidyMark = true; /* add meta element indicating tidied doc */
119   protected boolean Emacs = false; /* if true format error output for GNU Emacs */
120   protected boolean LiteralAttribs = false; /* if true attributes may use newlines */
121   protected boolean CompactFormat=false; // true to disable FM_BREAK.
122   protected boolean FixAlt= true; /* true to use src filename for missing alt attribute text. */
123   /** 
124    * Reformat mode perform inlineDup, run clean up filters ... etc. to cleanup the document.
125    * When set to false, changes to the document are minimized (no inlineDup(), Clean filters ... etc).
126    */
127   protected boolean doReformat=true; 
128   
129   private TagTable fTagTable; /* TagTable associated with this Configuration */
130   private AttributeTable fAttributeTable;
131   
132   private transient Properties _properties = new Properties();
133 
134   ////////////////////////////////////////////////////////////////////////////////////
135 
136   public Configuration() {
137     fAttributeTable=AttributeTable.getDefaultAttributeTable();
138     EntityTable.getDefaultEntityTable();
139     fTagTable= new TagTable();
140     fTagTable.setXML(XmlTags);
141   }
142 
143   ////////////////////////////////////////////////////////////////////////////////////
144 
145   private static int parseInt(String s, String option) {
146     int i = 0;
147     try {
148       i = Integer.parseInt(s);
149     } catch (NumberFormatException e) {
150       Report.badArgument(option);
151       i = -1;
152     }
153     return i;
154   }
155 
156   private static boolean parseBool(String s, String option) {
157     boolean b = false;
158     if (s != null && s.length() > 0) {
159       char c = s.charAt(0);
160       if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
161         b = true;
162       else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
163         b = false;
164       else
165         Report.badArgument(option);
166     }
167     return b;
168   }
169 
170   private static boolean parseInvBool(String s, String option) {
171     boolean b = false;
172     if (s != null && s.length() > 0) {
173       char c = s.charAt(0);
174       if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
175         b = true;
176       else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n'))
177         b = false;
178       else
179         Report.badArgument(option);
180     }
181     return !b;
182   }
183 
184   private static String parseName(String s, String option) {
185     StringTokenizer t = new StringTokenizer(s);
186     String rs = null;
187     if (t.countTokens() >= 1)
188       rs = t.nextToken();
189     else
190       Report.badArgument(option);
191     return rs;
192   }
193 
194   private static int parseCharEncoding(String s, String option) {
195     int result = ASCII;
196 
197     if (Lexer.wstrcasecmp(s, "ascii") == 0)
198       result = ASCII;
199     else if (Lexer.wstrcasecmp(s, "latin1") == 0)
200       result = LATIN1;
201     else if (Lexer.wstrcasecmp(s, "raw") == 0)
202       result = RAW;
203     else if (Lexer.wstrcasecmp(s, "utf8") == 0)
204       result = UTF8;
205     else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
206       result = ISO2022;
207     else if (Lexer.wstrcasecmp(s, "mac") == 0)
208       result = MACROMAN;
209     else
210       Report.badArgument(option);
211 
212     return result;
213   }
214 
215   ////////////////////////////////////////////////////////////////////////////////////
216 
217   public TagTable getTagTable() {
218     return fTagTable;
219   }
220   
221   public void setTagTable(TagTable table) {
222     fTagTable=table;
223   }
224   
225   public AttributeTable getAttributeTable() {
226     return fAttributeTable;
227   }
228   
229   public void setAttributeTable(AttributeTable table) {
230     fAttributeTable=table;
231   }
232   
233   public void addProps(Properties p) {
234     Enumeration enum = p.propertyNames();
235     while (enum.hasMoreElements()) {
236       String key = (String) enum.nextElement();
237       String value = p.getProperty(key);
238       _properties.put(key, value);
239     }
240     parseProps();
241   }
242 
243   public void parseFile(String filename) {
244     try {
245       _properties.load(new FileInputStream(filename));
246     } catch (IOException e) {
247       System.err.println(filename + e.toString());
248       return;
249     }
250     parseProps();
251   }
252 
253   /* ensure that config is self consistent */
254   public void adjust() {
255     if (EncloseBlockText)
256       EncloseBodyText = true;
257 
258     /* avoid the need to set IndentContent when SmartIndent is set */
259 
260     if (SmartIndent)
261       IndentContent = true;
262 
263     /* disable wrapping */
264     if (wraplen == 0)
265       wraplen = 0x7FFFFFFF;
266 
267     /* Word 2000 needs o:p to be declared as inline */
268     if (Word2000) {
269       fTagTable.defineInlineTag("o:p");
270     }
271 
272     /* XHTML is written in lower case */
273     if (xHTML) {
274       XmlOut = true;
275       UpperCaseTags = false;
276       UpperCaseAttrs = false;
277     }
278 
279     /* if XML in, then XML out */
280     if (XmlTags) {
281       XmlOut = true;
282       XmlPIs = true;
283       fTagTable.setXML(true);
284     }
285 
286     /* XML requires end tags */
287     if (XmlOut) {
288       // Converting & to &amp; would cause & to becomes &amp;amp;... when formatted
289       // more that once, so it would not work.  Let user fix the document.
290       QuoteAmpersand = false;
291       HideEndTags = false;
292     }
293   }
294 
295   ////////////////////////////////////////////////////////////////////////////////////
296 
297   /*
298      doctype: omit | auto | strict | loose | <fpi>
299   
300      where the fpi is a string similar to
301   
302         "-//ACME//DTD HTML 3.14159//EN"
303   */
304   protected String parseDocType(String s, String option) {
305     s = s.trim();
306 
307     /* "-//ACME//DTD HTML 3.14159//EN" or similar */
308 
309     if (s.startsWith("\"")) {
310       docTypeMode = DOCTYPE_USER;
311       return s;
312     }
313 
314     /* read first word */
315     String word = "";
316     StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
317     if (t.hasMoreTokens())
318       word = t.nextToken();
319 
320     if (Lexer.wstrcasecmp(word, "omit") == 0)
321       docTypeMode = DOCTYPE_OMIT;
322     else if (Lexer.wstrcasecmp(word, "strict") == 0)
323       docTypeMode = DOCTYPE_STRICT;
324     else if (Lexer.wstrcasecmp(word, "loose") == 0 || Lexer.wstrcasecmp(word, "transitional") == 0)
325       docTypeMode = DOCTYPE_LOOSE;
326     else if (Lexer.wstrcasecmp(word, "auto") == 0)
327       docTypeMode = DOCTYPE_AUTO;
328     else {
329       docTypeMode = DOCTYPE_AUTO;
330       Report.badArgument(option);
331     }
332     return null;
333   }
334 
335   ////////////////////////////////////////////////////////////////////////////////////
336 
337   private void parseProps() {
338     String value;
339 
340     value = _properties.getProperty("indent-spaces");
341     if (value != null)
342       indent = parseInt(value, "indent-spaces");
343 
344     value = _properties.getProperty("wrap");
345     if (value != null)
346       wraplen = parseInt(value, "wrap");
347 
348     value = _properties.getProperty("wrap-attributes");
349     if (value != null)
350       WrapAttVals = parseBool(value, "wrap-attributes");
351 
352     value = _properties.getProperty("wrap-script-literals");
353     if (value != null)
354       WrapScriptlets = parseBool(value, "wrap-script-literals");
355 
356     value = _properties.getProperty("wrap-sections");
357     if (value != null)
358       WrapSection = parseBool(value, "wrap-sections");
359 
360     value = _properties.getProperty("wrap-asp");
361     if (value != null)
362       WrapAsp = parseBool(value, "wrap-asp");
363 
364     value = _properties.getProperty("wrap-jste");
365     if (value != null)
366       WrapJste = parseBool(value, "wrap-jste");
367 
368     value = _properties.getProperty("wrap-php");
369     if (value != null)
370       WrapPhp = parseBool(value, "wrap-php");
371 
372     value = _properties.getProperty("literal-attributes");
373     if (value != null)
374       LiteralAttribs = parseBool(value, "literal-attributes");
375 
376     value = _properties.getProperty("tab-size");
377     if (value != null)
378       tabsize = parseInt(value, "tab-size");
379 
380     value = _properties.getProperty("markup");
381     if (value != null)
382       OnlyErrors = parseInvBool(value, "markup");
383 
384     value = _properties.getProperty("quiet");
385     if (value != null)
386       Quiet = parseBool(value, "quiet");
387 
388     value = _properties.getProperty("tidy-mark");
389     if (value != null)
390       TidyMark = parseBool(value, "tidy-mark");
391 
392     value = _properties.getProperty("indent");
393     if (value != null)
394       IndentContent = parseIndent(value, "indent");
395 
396     value = _properties.getProperty("indent-attributes");
397     if (value != null)
398       IndentAttributes = parseBool(value, "ident-attributes");
399 
400     value = _properties.getProperty("hide-endtags");
401     if (value != null)
402       HideEndTags = parseBool(value, "hide-endtags");
403 
404     value = _properties.getProperty("input-xml");
405     if (value != null)
406       XmlTags = parseBool(value, "input-xml");
407 
408     value = _properties.getProperty("output-xml");
409     if (value != null)
410       XmlOut = parseBool(value, "output-xml");
411 
412     value = _properties.getProperty("output-xhtml");
413     if (value != null)
414       xHTML = parseBool(value, "output-xhtml");
415 
416     value = _properties.getProperty("add-xml-pi");
417     if (value != null)
418       XmlPi = parseBool(value, "add-xml-pi");
419 
420     value = _properties.getProperty("add-xml-decl");
421     if (value != null)
422       XmlPi = parseBool(value, "add-xml-decl");
423 
424     value = _properties.getProperty("assume-xml-procins");
425     if (value != null)
426       XmlPIs = parseBool(value, "assume-xml-procins");
427 
428     value = _properties.getProperty("raw");
429     if (value != null)
430       RawOut = parseBool(value, "raw");
431 
432     value = _properties.getProperty("uppercase-tags");
433     if (value != null)
434       UpperCaseTags = parseBool(value, "uppercase-tags");
435 
436     value = _properties.getProperty("uppercase-attributes");
437     if (value != null)
438       UpperCaseAttrs = parseBool(value, "uppercase-attributes");
439 
440     value = _properties.getProperty("clean");
441     if (value != null)
442       MakeClean = parseBool(value, "clean");
443 
444     value = _properties.getProperty("logical-emphasis");
445     if (value != null)
446       LogicalEmphasis = parseBool(value, "logical-emphasis");
447 
448     value = _properties.getProperty("word-2000");
449     if (value != null)
450       Word2000 = parseBool(value, "word-2000");
451 
452     value = _properties.getProperty("drop-empty-paras");
453     if (value != null)
454       DropEmptyParas = parseBool(value, "drop-empty-paras");
455 
456     value = _properties.getProperty("drop-font-tags");
457     if (value != null)
458       DropFontTags = parseBool(value, "drop-font-tags");
459 
460     value = _properties.getProperty("enclose-text");
461     if (value != null)
462       EncloseBodyText = parseBool(value, "enclose-text");
463 
464     value = _properties.getProperty("enclose-block-text");
465     if (value != null)
466       EncloseBlockText = parseBool(value, "enclose-block-text");
467 
468     value = _properties.getProperty("no-fix-alt");
469     if (value != null)
470       FixAlt = false;
471 
472     value = _properties.getProperty("add-xml-space");
473     if (value != null)
474       XmlSpace = parseBool(value, "add-xml-space");
475 
476     value = _properties.getProperty("fix-bad-comments");
477     if (value != null)
478       FixComments = parseBool(value, "fix-bad-comments");
479 
480     value = _properties.getProperty("split");
481     if (value != null)
482       BurstSlides = parseBool(value, "split");
483 
484     value = _properties.getProperty("break-before-br");
485     if (value != null)
486       BreakBeforeBR = parseBool(value, "break-before-br");
487 
488     value = _properties.getProperty("numeric-entities");
489     if (value != null)
490       NumEntities = parseBool(value, "numeric-entities");
491 
492     value = _properties.getProperty("quote-marks");
493     if (value != null)
494       QuoteMarks = parseBool(value, "quote-marks");
495 
496     value = _properties.getProperty("quote-nbsp");
497     if (value != null)
498       QuoteNbsp = parseBool(value, "quote-nbsp");
499 
500     value = _properties.getProperty("quote-ampersand");
501     if (value != null)
502       QuoteAmpersand = parseBool(value, "quote-ampersand");
503 
504     value = _properties.getProperty("write-back");
505     if (value != null)
506       writeback = parseBool(value, "write-back");
507 
508     value = _properties.getProperty("keep-time");
509     if (value != null)
510       KeepFileTimes = parseBool(value, "keep-time");
511 
512     value = _properties.getProperty("show-warnings");
513     if (value != null)
514       ShowWarnings = parseBool(value, "show-warnings");
515 
516     value = _properties.getProperty("error-file");
517     if (value != null)
518       errfile = parseName(value, "error-file");
519 
520     value = _properties.getProperty("slide-style");
521     if (value != null)
522       slidestyle = parseName(value, "slide-style");
523 
524     value = _properties.getProperty("new-inline-tags");
525     if (value != null)
526       parseInlineTagNames(value, "new-inline-tags");
527 
528     value = _properties.getProperty("new-blocklevel-tags");
529     if (value != null)
530       parseBlockTagNames(value, "new-blocklevel-tags");
531 
532     value = _properties.getProperty("new-empty-tags");
533     if (value != null)
534       parseEmptyTagNames(value, "new-empty-tags");
535 
536     value = _properties.getProperty("new-pre-tags");
537     if (value != null)
538       parsePreTagNames(value, "new-pre-tags");
539 
540     value = _properties.getProperty("char-encoding");
541     if (value != null)
542       CharEncoding = parseCharEncoding(value, "char-encoding");
543 
544     value = _properties.getProperty("doctype");
545     if (value != null)
546       docTypeStr = parseDocType(value, "doctype");
547 
548     value = _properties.getProperty("fix-backslash");
549     if (value != null)
550       FixBackslash = parseBool(value, "fix-backslash");
551 
552     value = _properties.getProperty("gnu-emacs");
553     if (value != null)
554       Emacs = parseBool(value, "gnu-emacs");
555   }
556 
557   /* slight hack to avoid changes to pprint.c */
558   private boolean parseIndent(String s, String option) {
559     boolean b = IndentContent;
560 
561     if (Lexer.wstrcasecmp(s, "yes") == 0) {
562       b = true;
563       SmartIndent = false;
564     } else if (Lexer.wstrcasecmp(s, "true") == 0) {
565       b = true;
566       SmartIndent = false;
567     } else if (Lexer.wstrcasecmp(s, "no") == 0) {
568       b = false;
569       SmartIndent = false;
570     } else if (Lexer.wstrcasecmp(s, "false") == 0) {
571       b = false;
572       SmartIndent = false;
573     } else if (Lexer.wstrcasecmp(s, "auto") == 0) {
574       b = true;
575       SmartIndent = true;
576     } else
577       Report.badArgument(option);
578     return b;
579   }
580 
581   private void parseInlineTagNames(String s, String option) {
582     StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
583     while (t.hasMoreTokens()) {
584       fTagTable.defineInlineTag(t.nextToken());
585     }
586   }
587 
588   private void parseBlockTagNames(String s, String option) {
589     StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
590     while (t.hasMoreTokens()) {
591       fTagTable.defineBlockTag(t.nextToken());
592     }
593   }
594 
595   private void parseEmptyTagNames(String s, String option) {
596     StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
597     while (t.hasMoreTokens()) {
598       fTagTable.defineEmptyTag(t.nextToken());
599     }
600   }
601 
602   private void parsePreTagNames(String s, String option) {
603     StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
604     while (t.hasMoreTokens()) {
605       fTagTable.definePreTag(t.nextToken());
606     }
607   }
608 
609   ////////////////////////////////////////////////////////////////////////////////////
610 
611 }