Source code: com/port80/html/tidy/Configuration.java
1 /*
2 * @(#)Configuration.java 1.11 2000/08/16
3 *
4 */
5
6 package com.port80.html.tidy;
7
8 /**
9 *
10 * Read configuration file and manage configuration properties.
11 *
12 * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
13 * See Tidy.java for the copyright notice.
14 * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
15 * HTML Tidy Release 4 Aug 2000</a>
16 *
17 * @author Dave Raggett <dsr@w3.org>
18 * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
19 * @version 1.0, 1999/05/22
20 * @version 1.0.1, 1999/05/29
21 * @version 1.1, 1999/06/18 Java Bean
22 * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
23 * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
24 * @version 1.4, 1999/09/04 DOM support
25 * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
26 * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
27 * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
28 * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
29 * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
30 * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
31 * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
32 */
33
34 /*
35 Configuration files associate a property name with a value.
36 The format is that of a Java .properties file.
37 */
38
39 import java.io.FileInputStream;
40 import java.io.IOException;
41 import java.util.Enumeration;
42 import java.util.Properties;
43 import java.util.StringTokenizer;
44
45 public class Configuration implements java.io.Serializable {
46
47 ////////////////////////////////////////////////////////////////////////////////////
48
49 /* character encodings */
50 public static final int RAW = 0;
51 public static final int ASCII = 1;
52 public static final int LATIN1 = 2;
53 public static final int UTF8 = 3;
54 public static final int ISO2022 = 4;
55 public static final int MACROMAN = 5;
56
57 /* mode controlling treatment of doctype */
58 public static final int DOCTYPE_OMIT = 0;
59 public static final int DOCTYPE_AUTO = 1;
60 public static final int DOCTYPE_STRICT = 2;
61 public static final int DOCTYPE_LOOSE = 3;
62 public static final int DOCTYPE_USER = 4;
63
64 ////////////////////////////////////////////////////////////////////////////////////
65
66 protected int indent = 4; /* default indentation */
67 protected int tabsize = 4;
68 protected int wraplen = 120; /* default wrap margin */
69 protected int CharEncoding = ASCII;
70
71 protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */
72 protected String slidestyle = null; /* style sheet for slides */
73 protected String docTypeStr = null; /* user specified doctype */
74 protected String errfile = null; /* file name to write errors to */
75 protected boolean writeback = false; /* if true then output tidied markup */
76
77 protected boolean OnlyErrors = false; /* if true normal output is suppressed */
78 protected boolean ShowWarnings = true; /* however errors are always shown */
79 protected boolean ShowInfo = true; /* show informative warnings. */
80 protected boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */
81 protected boolean IndentContent = true; /* indent content of appropriate tags */
82 protected boolean SmartIndent = true; /* does text/block level content effect indentation */
83 protected boolean HideEndTags = false; /* suppress optional end tags */
84 protected boolean XmlTags = false; /* treat input as XML */
85 protected boolean XmlOut = false; /* create output as XML */
86 protected boolean xHTML = false; /* output extensible HTML */
87 protected boolean XmlPi = false; /* add <?xml?> for XML docs */
88 protected boolean RawOut = false; /* avoid mapping values > 127 to entities */
89 protected boolean UpperCaseTags = false; /* output tags in upper not lower case */
90 protected boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */
91 protected boolean MakeClean = false; /* remove presentational clutter */
92 protected boolean LogicalEmphasis = false; /* replace i by em and b by strong */
93 protected boolean DropFontTags = false; /* discard presentation tags */
94 protected boolean DropEmptyParas = true; /* discard empty p elements */
95 protected boolean FixComments = true; /* fix comments with adjacent hyphens */
96 protected boolean BreakBeforeBR = false; /* o/p newline before <br> or not? */
97 protected boolean BurstSlides = false; /* create slides on each h2 element */
98 protected boolean NumEntities = false; /* use numeric entities */
99 protected boolean QuoteMarks = false; /* output " marks as " */
100 protected boolean QuoteNbsp = true; /* output non-breaking space as entity */
101 // Converting & to & would cause & to becomes &amp;... when formatted
102 // more that once, so it would not work. Let user fix the document.
103 protected boolean QuoteAmpersand = false; /* output naked ampersand as & */
104 protected boolean WrapAttVals = false; /* wrap within attribute values */
105 protected boolean WrapScriptlets = false; /* wrap within JavaScript string literals */
106 protected boolean WrapSection = true; /* wrap within <![ ... ]> section tags */
107 protected boolean WrapAsp = true; /* wrap within ASP pseudo elements */
108 protected boolean WrapJste = true; /* wrap within JSTE pseudo elements */
109 protected boolean WrapPhp = true; /* wrap within PHP pseudo elements */
110 protected boolean FixBackslash = true; /* fix URLs by replacing \ with / */
111 protected boolean IndentAttributes = false; /* newline+indent before each attribute */
112 protected boolean XmlPIs = false; /* if set to yes PIs must end with ?> */
113 protected boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */
114 protected boolean EncloseBodyText = false; /* if yes text at body is wrapped in <p>'s */
115 protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
116 protected boolean KeepFileTimes = true; /* if yes last modied time is preserved */
117 protected boolean Word2000 = false; /* draconian cleaning for Word2000 */
118 protected boolean TidyMark = true; /* add meta element indicating tidied doc */
119 protected boolean Emacs = false; /* if true format error output for GNU Emacs */
120 protected boolean LiteralAttribs = false; /* if true attributes may use newlines */
121 protected boolean CompactFormat=false; // true to disable FM_BREAK.
122 protected boolean FixAlt= true; /* true to use src filename for missing alt attribute text. */
123 /**
124 * Reformat mode perform inlineDup, run clean up filters ... etc. to cleanup the document.
125 * When set to false, changes to the document are minimized (no inlineDup(), Clean filters ... etc).
126 */
127 protected boolean doReformat=true;
128
129 private TagTable fTagTable; /* TagTable associated with this Configuration */
130 private AttributeTable fAttributeTable;
131
132 private transient Properties _properties = new Properties();
133
134 ////////////////////////////////////////////////////////////////////////////////////
135
136 public Configuration() {
137 fAttributeTable=AttributeTable.getDefaultAttributeTable();
138 EntityTable.getDefaultEntityTable();
139 fTagTable= new TagTable();
140 fTagTable.setXML(XmlTags);
141 }
142
143 ////////////////////////////////////////////////////////////////////////////////////
144
145 private static int parseInt(String s, String option) {
146 int i = 0;
147 try {
148 i = Integer.parseInt(s);
149 } catch (NumberFormatException e) {
150 Report.badArgument(option);
151 i = -1;
152 }
153 return i;
154 }
155
156 private static boolean parseBool(String s, String option) {
157 boolean b = false;
158 if (s != null && s.length() > 0) {
159 char c = s.charAt(0);
160 if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
161 b = true;
162 else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
163 b = false;
164 else
165 Report.badArgument(option);
166 }
167 return b;
168 }
169
170 private static boolean parseInvBool(String s, String option) {
171 boolean b = false;
172 if (s != null && s.length() > 0) {
173 char c = s.charAt(0);
174 if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
175 b = true;
176 else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n'))
177 b = false;
178 else
179 Report.badArgument(option);
180 }
181 return !b;
182 }
183
184 private static String parseName(String s, String option) {
185 StringTokenizer t = new StringTokenizer(s);
186 String rs = null;
187 if (t.countTokens() >= 1)
188 rs = t.nextToken();
189 else
190 Report.badArgument(option);
191 return rs;
192 }
193
194 private static int parseCharEncoding(String s, String option) {
195 int result = ASCII;
196
197 if (Lexer.wstrcasecmp(s, "ascii") == 0)
198 result = ASCII;
199 else if (Lexer.wstrcasecmp(s, "latin1") == 0)
200 result = LATIN1;
201 else if (Lexer.wstrcasecmp(s, "raw") == 0)
202 result = RAW;
203 else if (Lexer.wstrcasecmp(s, "utf8") == 0)
204 result = UTF8;
205 else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
206 result = ISO2022;
207 else if (Lexer.wstrcasecmp(s, "mac") == 0)
208 result = MACROMAN;
209 else
210 Report.badArgument(option);
211
212 return result;
213 }
214
215 ////////////////////////////////////////////////////////////////////////////////////
216
217 public TagTable getTagTable() {
218 return fTagTable;
219 }
220
221 public void setTagTable(TagTable table) {
222 fTagTable=table;
223 }
224
225 public AttributeTable getAttributeTable() {
226 return fAttributeTable;
227 }
228
229 public void setAttributeTable(AttributeTable table) {
230 fAttributeTable=table;
231 }
232
233 public void addProps(Properties p) {
234 Enumeration enum = p.propertyNames();
235 while (enum.hasMoreElements()) {
236 String key = (String) enum.nextElement();
237 String value = p.getProperty(key);
238 _properties.put(key, value);
239 }
240 parseProps();
241 }
242
243 public void parseFile(String filename) {
244 try {
245 _properties.load(new FileInputStream(filename));
246 } catch (IOException e) {
247 System.err.println(filename + e.toString());
248 return;
249 }
250 parseProps();
251 }
252
253 /* ensure that config is self consistent */
254 public void adjust() {
255 if (EncloseBlockText)
256 EncloseBodyText = true;
257
258 /* avoid the need to set IndentContent when SmartIndent is set */
259
260 if (SmartIndent)
261 IndentContent = true;
262
263 /* disable wrapping */
264 if (wraplen == 0)
265 wraplen = 0x7FFFFFFF;
266
267 /* Word 2000 needs o:p to be declared as inline */
268 if (Word2000) {
269 fTagTable.defineInlineTag("o:p");
270 }
271
272 /* XHTML is written in lower case */
273 if (xHTML) {
274 XmlOut = true;
275 UpperCaseTags = false;
276 UpperCaseAttrs = false;
277 }
278
279 /* if XML in, then XML out */
280 if (XmlTags) {
281 XmlOut = true;
282 XmlPIs = true;
283 fTagTable.setXML(true);
284 }
285
286 /* XML requires end tags */
287 if (XmlOut) {
288 // Converting & to & would cause & to becomes &amp;... when formatted
289 // more that once, so it would not work. Let user fix the document.
290 QuoteAmpersand = false;
291 HideEndTags = false;
292 }
293 }
294
295 ////////////////////////////////////////////////////////////////////////////////////
296
297 /*
298 doctype: omit | auto | strict | loose | <fpi>
299
300 where the fpi is a string similar to
301
302 "-//ACME//DTD HTML 3.14159//EN"
303 */
304 protected String parseDocType(String s, String option) {
305 s = s.trim();
306
307 /* "-//ACME//DTD HTML 3.14159//EN" or similar */
308
309 if (s.startsWith("\"")) {
310 docTypeMode = DOCTYPE_USER;
311 return s;
312 }
313
314 /* read first word */
315 String word = "";
316 StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
317 if (t.hasMoreTokens())
318 word = t.nextToken();
319
320 if (Lexer.wstrcasecmp(word, "omit") == 0)
321 docTypeMode = DOCTYPE_OMIT;
322 else if (Lexer.wstrcasecmp(word, "strict") == 0)
323 docTypeMode = DOCTYPE_STRICT;
324 else if (Lexer.wstrcasecmp(word, "loose") == 0 || Lexer.wstrcasecmp(word, "transitional") == 0)
325 docTypeMode = DOCTYPE_LOOSE;
326 else if (Lexer.wstrcasecmp(word, "auto") == 0)
327 docTypeMode = DOCTYPE_AUTO;
328 else {
329 docTypeMode = DOCTYPE_AUTO;
330 Report.badArgument(option);
331 }
332 return null;
333 }
334
335 ////////////////////////////////////////////////////////////////////////////////////
336
337 private void parseProps() {
338 String value;
339
340 value = _properties.getProperty("indent-spaces");
341 if (value != null)
342 indent = parseInt(value, "indent-spaces");
343
344 value = _properties.getProperty("wrap");
345 if (value != null)
346 wraplen = parseInt(value, "wrap");
347
348 value = _properties.getProperty("wrap-attributes");
349 if (value != null)
350 WrapAttVals = parseBool(value, "wrap-attributes");
351
352 value = _properties.getProperty("wrap-script-literals");
353 if (value != null)
354 WrapScriptlets = parseBool(value, "wrap-script-literals");
355
356 value = _properties.getProperty("wrap-sections");
357 if (value != null)
358 WrapSection = parseBool(value, "wrap-sections");
359
360 value = _properties.getProperty("wrap-asp");
361 if (value != null)
362 WrapAsp = parseBool(value, "wrap-asp");
363
364 value = _properties.getProperty("wrap-jste");
365 if (value != null)
366 WrapJste = parseBool(value, "wrap-jste");
367
368 value = _properties.getProperty("wrap-php");
369 if (value != null)
370 WrapPhp = parseBool(value, "wrap-php");
371
372 value = _properties.getProperty("literal-attributes");
373 if (value != null)
374 LiteralAttribs = parseBool(value, "literal-attributes");
375
376 value = _properties.getProperty("tab-size");
377 if (value != null)
378 tabsize = parseInt(value, "tab-size");
379
380 value = _properties.getProperty("markup");
381 if (value != null)
382 OnlyErrors = parseInvBool(value, "markup");
383
384 value = _properties.getProperty("quiet");
385 if (value != null)
386 Quiet = parseBool(value, "quiet");
387
388 value = _properties.getProperty("tidy-mark");
389 if (value != null)
390 TidyMark = parseBool(value, "tidy-mark");
391
392 value = _properties.getProperty("indent");
393 if (value != null)
394 IndentContent = parseIndent(value, "indent");
395
396 value = _properties.getProperty("indent-attributes");
397 if (value != null)
398 IndentAttributes = parseBool(value, "ident-attributes");
399
400 value = _properties.getProperty("hide-endtags");
401 if (value != null)
402 HideEndTags = parseBool(value, "hide-endtags");
403
404 value = _properties.getProperty("input-xml");
405 if (value != null)
406 XmlTags = parseBool(value, "input-xml");
407
408 value = _properties.getProperty("output-xml");
409 if (value != null)
410 XmlOut = parseBool(value, "output-xml");
411
412 value = _properties.getProperty("output-xhtml");
413 if (value != null)
414 xHTML = parseBool(value, "output-xhtml");
415
416 value = _properties.getProperty("add-xml-pi");
417 if (value != null)
418 XmlPi = parseBool(value, "add-xml-pi");
419
420 value = _properties.getProperty("add-xml-decl");
421 if (value != null)
422 XmlPi = parseBool(value, "add-xml-decl");
423
424 value = _properties.getProperty("assume-xml-procins");
425 if (value != null)
426 XmlPIs = parseBool(value, "assume-xml-procins");
427
428 value = _properties.getProperty("raw");
429 if (value != null)
430 RawOut = parseBool(value, "raw");
431
432 value = _properties.getProperty("uppercase-tags");
433 if (value != null)
434 UpperCaseTags = parseBool(value, "uppercase-tags");
435
436 value = _properties.getProperty("uppercase-attributes");
437 if (value != null)
438 UpperCaseAttrs = parseBool(value, "uppercase-attributes");
439
440 value = _properties.getProperty("clean");
441 if (value != null)
442 MakeClean = parseBool(value, "clean");
443
444 value = _properties.getProperty("logical-emphasis");
445 if (value != null)
446 LogicalEmphasis = parseBool(value, "logical-emphasis");
447
448 value = _properties.getProperty("word-2000");
449 if (value != null)
450 Word2000 = parseBool(value, "word-2000");
451
452 value = _properties.getProperty("drop-empty-paras");
453 if (value != null)
454 DropEmptyParas = parseBool(value, "drop-empty-paras");
455
456 value = _properties.getProperty("drop-font-tags");
457 if (value != null)
458 DropFontTags = parseBool(value, "drop-font-tags");
459
460 value = _properties.getProperty("enclose-text");
461 if (value != null)
462 EncloseBodyText = parseBool(value, "enclose-text");
463
464 value = _properties.getProperty("enclose-block-text");
465 if (value != null)
466 EncloseBlockText = parseBool(value, "enclose-block-text");
467
468 value = _properties.getProperty("no-fix-alt");
469 if (value != null)
470 FixAlt = false;
471
472 value = _properties.getProperty("add-xml-space");
473 if (value != null)
474 XmlSpace = parseBool(value, "add-xml-space");
475
476 value = _properties.getProperty("fix-bad-comments");
477 if (value != null)
478 FixComments = parseBool(value, "fix-bad-comments");
479
480 value = _properties.getProperty("split");
481 if (value != null)
482 BurstSlides = parseBool(value, "split");
483
484 value = _properties.getProperty("break-before-br");
485 if (value != null)
486 BreakBeforeBR = parseBool(value, "break-before-br");
487
488 value = _properties.getProperty("numeric-entities");
489 if (value != null)
490 NumEntities = parseBool(value, "numeric-entities");
491
492 value = _properties.getProperty("quote-marks");
493 if (value != null)
494 QuoteMarks = parseBool(value, "quote-marks");
495
496 value = _properties.getProperty("quote-nbsp");
497 if (value != null)
498 QuoteNbsp = parseBool(value, "quote-nbsp");
499
500 value = _properties.getProperty("quote-ampersand");
501 if (value != null)
502 QuoteAmpersand = parseBool(value, "quote-ampersand");
503
504 value = _properties.getProperty("write-back");
505 if (value != null)
506 writeback = parseBool(value, "write-back");
507
508 value = _properties.getProperty("keep-time");
509 if (value != null)
510 KeepFileTimes = parseBool(value, "keep-time");
511
512 value = _properties.getProperty("show-warnings");
513 if (value != null)
514 ShowWarnings = parseBool(value, "show-warnings");
515
516 value = _properties.getProperty("error-file");
517 if (value != null)
518 errfile = parseName(value, "error-file");
519
520 value = _properties.getProperty("slide-style");
521 if (value != null)
522 slidestyle = parseName(value, "slide-style");
523
524 value = _properties.getProperty("new-inline-tags");
525 if (value != null)
526 parseInlineTagNames(value, "new-inline-tags");
527
528 value = _properties.getProperty("new-blocklevel-tags");
529 if (value != null)
530 parseBlockTagNames(value, "new-blocklevel-tags");
531
532 value = _properties.getProperty("new-empty-tags");
533 if (value != null)
534 parseEmptyTagNames(value, "new-empty-tags");
535
536 value = _properties.getProperty("new-pre-tags");
537 if (value != null)
538 parsePreTagNames(value, "new-pre-tags");
539
540 value = _properties.getProperty("char-encoding");
541 if (value != null)
542 CharEncoding = parseCharEncoding(value, "char-encoding");
543
544 value = _properties.getProperty("doctype");
545 if (value != null)
546 docTypeStr = parseDocType(value, "doctype");
547
548 value = _properties.getProperty("fix-backslash");
549 if (value != null)
550 FixBackslash = parseBool(value, "fix-backslash");
551
552 value = _properties.getProperty("gnu-emacs");
553 if (value != null)
554 Emacs = parseBool(value, "gnu-emacs");
555 }
556
557 /* slight hack to avoid changes to pprint.c */
558 private boolean parseIndent(String s, String option) {
559 boolean b = IndentContent;
560
561 if (Lexer.wstrcasecmp(s, "yes") == 0) {
562 b = true;
563 SmartIndent = false;
564 } else if (Lexer.wstrcasecmp(s, "true") == 0) {
565 b = true;
566 SmartIndent = false;
567 } else if (Lexer.wstrcasecmp(s, "no") == 0) {
568 b = false;
569 SmartIndent = false;
570 } else if (Lexer.wstrcasecmp(s, "false") == 0) {
571 b = false;
572 SmartIndent = false;
573 } else if (Lexer.wstrcasecmp(s, "auto") == 0) {
574 b = true;
575 SmartIndent = true;
576 } else
577 Report.badArgument(option);
578 return b;
579 }
580
581 private void parseInlineTagNames(String s, String option) {
582 StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
583 while (t.hasMoreTokens()) {
584 fTagTable.defineInlineTag(t.nextToken());
585 }
586 }
587
588 private void parseBlockTagNames(String s, String option) {
589 StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
590 while (t.hasMoreTokens()) {
591 fTagTable.defineBlockTag(t.nextToken());
592 }
593 }
594
595 private void parseEmptyTagNames(String s, String option) {
596 StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
597 while (t.hasMoreTokens()) {
598 fTagTable.defineEmptyTag(t.nextToken());
599 }
600 }
601
602 private void parsePreTagNames(String s, String option) {
603 StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
604 while (t.hasMoreTokens()) {
605 fTagTable.definePreTag(t.nextToken());
606 }
607 }
608
609 ////////////////////////////////////////////////////////////////////////////////////
610
611 }