1 /*
2 * YUI Compressor
3 * Author: Julien Lecomte <jlecomte@yahoo-inc.com>
4 * Copyright (c) 2007, Yahoo! Inc. All rights reserved.
5 * Code licensed under the BSD License:
6 * http://developer.yahoo.net/yui/license.txt
7 */
8
9 package com.yahoo.platform.yui.compressor;
10
11 import org.mozilla.javascript;
12
13 import java.io.IOException;
14 import java.io.Reader;
15 import java.io.Writer;
16 import java.util;
17
18 public class JavaScriptCompressor {
19
20 static final ArrayList ones;
21 static final ArrayList twos;
22 static final ArrayList threes;
23
24 static final Set builtin = new HashSet();
25 static final Map literals = new Hashtable();
26
27 static {
28
29 // This list contains all the 3 characters or less built-in global
30 // symbols available in a browser. Please add to this list if you
31 // see anything missing.
32 builtin.add("NaN");
33 builtin.add("top");
34
35 ones = new ArrayList();
36 for (char c = 'A'; c <= 'Z'; c++)
37 ones.add(Character.toString(c));
38 for (char c = 'a'; c <= 'z'; c++)
39 ones.add(Character.toString(c));
40
41 twos = new ArrayList();
42 for (int i = 0; i < ones.size(); i++) {
43 String one = (String) ones.get(i);
44 for (char c = 'A'; c <= 'Z'; c++)
45 twos.add(one + Character.toString(c));
46 for (char c = 'a'; c <= 'z'; c++)
47 twos.add(one + Character.toString(c));
48 for (char c = '0'; c <= '9'; c++)
49 twos.add(one + Character.toString(c));
50 }
51
52 // Remove two-letter JavaScript reserved words and built-in globals...
53 twos.remove("as");
54 twos.remove("is");
55 twos.remove("do");
56 twos.remove("if");
57 twos.remove("in");
58 twos.removeAll(builtin);
59
60 threes = new ArrayList();
61 for (int i = 0; i < twos.size(); i++) {
62 String two = (String) twos.get(i);
63 for (char c = 'A'; c <= 'Z'; c++)
64 threes.add(two + Character.toString(c));
65 for (char c = 'a'; c <= 'z'; c++)
66 threes.add(two + Character.toString(c));
67 for (char c = '0'; c <= '9'; c++)
68 threes.add(two + Character.toString(c));
69 }
70
71 // Remove three-letter JavaScript reserved words and built-in globals...
72 threes.remove("for");
73 threes.remove("int");
74 threes.remove("new");
75 threes.remove("try");
76 threes.remove("use");
77 threes.remove("var");
78 threes.removeAll(builtin);
79
80 // That's up to ((26+26)*(1+(26+26+10)))*(1+(26+26+10))-8
81 // (206,380 symbols per scope)
82
83 // The following list comes from org/mozilla/javascript/Decompiler.java...
84 literals.put(new Integer(Token.GET), "get ");
85 literals.put(new Integer(Token.SET), "set ");
86 literals.put(new Integer(Token.TRUE), "true");
87 literals.put(new Integer(Token.FALSE), "false");
88 literals.put(new Integer(Token.NULL), "null");
89 literals.put(new Integer(Token.THIS), "this");
90 literals.put(new Integer(Token.FUNCTION), "function ");
91 literals.put(new Integer(Token.COMMA), ",");
92 literals.put(new Integer(Token.LC), "{");
93 literals.put(new Integer(Token.RC), "}");
94 literals.put(new Integer(Token.LP), "(");
95 literals.put(new Integer(Token.RP), ")");
96 literals.put(new Integer(Token.LB), "[");
97 literals.put(new Integer(Token.RB), "]");
98 literals.put(new Integer(Token.DOT), ".");
99 literals.put(new Integer(Token.NEW), "new ");
100 literals.put(new Integer(Token.DELPROP), "delete ");
101 literals.put(new Integer(Token.IF), "if");
102 literals.put(new Integer(Token.ELSE), "else");
103 literals.put(new Integer(Token.FOR), "for");
104 literals.put(new Integer(Token.IN), " in ");
105 literals.put(new Integer(Token.WITH), "with");
106 literals.put(new Integer(Token.WHILE), "while");
107 literals.put(new Integer(Token.DO), "do");
108 literals.put(new Integer(Token.TRY), "try");
109 literals.put(new Integer(Token.CATCH), "catch");
110 literals.put(new Integer(Token.FINALLY), "finally");
111 literals.put(new Integer(Token.THROW), "throw ");
112 literals.put(new Integer(Token.SWITCH), "switch");
113 literals.put(new Integer(Token.BREAK), "break ");
114 literals.put(new Integer(Token.CONTINUE), "continue ");
115 literals.put(new Integer(Token.CASE), "case ");
116 literals.put(new Integer(Token.DEFAULT), "default");
117 literals.put(new Integer(Token.RETURN), "return ");
118 literals.put(new Integer(Token.VAR), "var ");
119 literals.put(new Integer(Token.SEMI), ";");
120 literals.put(new Integer(Token.ASSIGN), "=");
121 literals.put(new Integer(Token.ASSIGN_ADD), "+=");
122 literals.put(new Integer(Token.ASSIGN_SUB), "-=");
123 literals.put(new Integer(Token.ASSIGN_MUL), "*=");
124 literals.put(new Integer(Token.ASSIGN_DIV), "/=");
125 literals.put(new Integer(Token.ASSIGN_MOD), "%=");
126 literals.put(new Integer(Token.ASSIGN_BITOR), "|=");
127 literals.put(new Integer(Token.ASSIGN_BITXOR), "^=");
128 literals.put(new Integer(Token.ASSIGN_BITAND), "&=");
129 literals.put(new Integer(Token.ASSIGN_LSH), "<<=");
130 literals.put(new Integer(Token.ASSIGN_RSH), ">>=");
131 literals.put(new Integer(Token.ASSIGN_URSH), ">>>=");
132 literals.put(new Integer(Token.HOOK), "?");
133 literals.put(new Integer(Token.OBJECTLIT), ":");
134 literals.put(new Integer(Token.COLON), ":");
135 literals.put(new Integer(Token.OR), "||");
136 literals.put(new Integer(Token.AND), "&&");
137 literals.put(new Integer(Token.BITOR), "|");
138 literals.put(new Integer(Token.BITXOR), "^");
139 literals.put(new Integer(Token.BITAND), "&");
140 literals.put(new Integer(Token.SHEQ), "===");
141 literals.put(new Integer(Token.SHNE), "!==");
142 literals.put(new Integer(Token.EQ), "==");
143 literals.put(new Integer(Token.NE), "!=");
144 literals.put(new Integer(Token.LE), "<=");
145 literals.put(new Integer(Token.LT), "<");
146 literals.put(new Integer(Token.GE), ">=");
147 literals.put(new Integer(Token.GT), ">");
148 literals.put(new Integer(Token.INSTANCEOF), " instanceof ");
149 literals.put(new Integer(Token.LSH), "<<");
150 literals.put(new Integer(Token.RSH), ">>");
151 literals.put(new Integer(Token.URSH), ">>>");
152 literals.put(new Integer(Token.TYPEOF), "typeof ");
153 literals.put(new Integer(Token.VOID), "void ");
154 literals.put(new Integer(Token.CONST), "const ");
155 literals.put(new Integer(Token.NOT), "!");
156 literals.put(new Integer(Token.BITNOT), "~");
157 literals.put(new Integer(Token.POS), "+");
158 literals.put(new Integer(Token.NEG), "-");
159 literals.put(new Integer(Token.INC), "++");
160 literals.put(new Integer(Token.DEC), "--");
161 literals.put(new Integer(Token.ADD), "+");
162 literals.put(new Integer(Token.SUB), "-");
163 literals.put(new Integer(Token.MUL), "*");
164 literals.put(new Integer(Token.DIV), "/");
165 literals.put(new Integer(Token.MOD), "%");
166 literals.put(new Integer(Token.COLONCOLON), "::");
167 literals.put(new Integer(Token.DOTDOT), "..");
168 literals.put(new Integer(Token.DOTQUERY), ".(");
169 literals.put(new Integer(Token.XMLATTR), "@");
170 }
171
172 private static int countChar(String haystack, char needle) {
173 int idx = 0;
174 int count = 0;
175 int length = haystack.length();
176 while (idx < length) {
177 char c = haystack.charAt(idx++);
178 if (c == needle) {
179 count++;
180 }
181 }
182 return count;
183 }
184
185 private static int printSourceString(String source, int offset, StringBuffer sb) {
186 int length = source.charAt(offset);
187 ++offset;
188 if ((0x8000 & length) != 0) {
189 length = ((0x7FFF & length) << 16) | source.charAt(offset);
190 ++offset;
191 }
192 if (sb != null) {
193 String str = source.substring(offset, offset + length);
194 sb.append(str);
195 }
196 return offset + length;
197 }
198
199 private static int printSourceNumber(String source,
200 int offset, StringBuffer sb) {
201 double number = 0.0;
202 char type = source.charAt(offset);
203 ++offset;
204 if (type == 'S') {
205 if (sb != null) {
206 number = source.charAt(offset);
207 }
208 ++offset;
209 } else if (type == 'J' || type == 'D') {
210 if (sb != null) {
211 long lbits;
212 lbits = (long) source.charAt(offset) << 48;
213 lbits |= (long) source.charAt(offset + 1) << 32;
214 lbits |= (long) source.charAt(offset + 2) << 16;
215 lbits |= (long) source.charAt(offset + 3);
216 if (type == 'J') {
217 number = lbits;
218 } else {
219 number = Double.longBitsToDouble(lbits);
220 }
221 }
222 offset += 4;
223 } else {
224 // Bad source
225 throw new RuntimeException();
226 }
227 if (sb != null) {
228 sb.append(ScriptRuntime.numberToString(number, 10));
229 }
230 return offset;
231 }
232
233 private static ArrayList parse(Reader in, ErrorReporter reporter)
234 throws IOException, EvaluatorException {
235
236 CompilerEnvirons env = new CompilerEnvirons();
237 Parser parser = new Parser(env, reporter);
238 parser.parse(in, null, 1);
239 String source = parser.getEncodedSource();
240
241 int offset = 0;
242 int length = source.length();
243 ArrayList tokens = new ArrayList();
244 StringBuffer sb = new StringBuffer();
245
246 while (offset < length) {
247 int tt = source.charAt(offset++);
248 switch (tt) {
249
250 case Token.IECC:
251 case Token.NAME:
252 case Token.REGEXP:
253 case Token.STRING:
254 sb.setLength(0);
255 offset = printSourceString(source, offset, sb);
256 tokens.add(new JavaScriptToken(tt, sb.toString()));
257 break;
258
259 case Token.NUMBER:
260 sb.setLength(0);
261 offset = printSourceNumber(source, offset, sb);
262 tokens.add(new JavaScriptToken(tt, sb.toString()));
263 break;
264
265 default:
266 String literal = (String) literals.get(new Integer(tt));
267 if (literal != null) {
268 tokens.add(new JavaScriptToken(tt, literal));
269 }
270 break;
271 }
272 }
273
274 return tokens;
275 }
276
277 private static ArrayList processStringLiterals(ArrayList tokens, boolean merge) {
278
279 String tv;
280 int i, length;
281 ArrayList result = new ArrayList();
282 JavaScriptToken token, prevToken, nextToken;
283
284 // Concatenate string literals that are being appended wherever
285 // it is safe to do so. Note that we take care of the case:
286 // "a" + "b".toUpperCase()
287
288 for (i = 0, length = tokens.size(); i < length; i++) {
289 token = (JavaScriptToken) tokens.get(i);
290 switch (token.getType()) {
291
292 case Token.ADD:
293 if (merge) {
294 if (i > 0 && i < length) {
295 prevToken = (JavaScriptToken) result.get(result.size() - 1);
296 nextToken = (JavaScriptToken) tokens.get(i + 1);
297 if (prevToken.getType() == Token.STRING && nextToken.getType() == Token.STRING &&
298 (i == length - 1 || ((JavaScriptToken) tokens.get(i + 2)).getType() != Token.DOT)) {
299 result.set(result.size() - 1, new JavaScriptToken(Token.STRING,
300 prevToken.getValue() + nextToken.getValue()));
301 i++; // not a good practice, but oh well...
302 break;
303 }
304 }
305 }
306
307 /* FALLSTHROUGH */
308
309 default:
310 result.add(token);
311 break;
312 }
313 }
314
315 // Second pass...
316
317 for (i = 0, length = result.size(); i < length; i++) {
318 token = (JavaScriptToken) result.get(i);
319 if (token.getType() == Token.STRING) {
320 tv = token.getValue();
321
322 // Finally, add the quoting characters and escape the string. We use
323 // the quoting character that minimizes the amount of escaping to save
324 // a few additional bytes.
325
326 char quotechar;
327 int singleQuoteCount = countChar(tv, '\'');
328 int doubleQuoteCount = countChar(tv, '"');
329 if (doubleQuoteCount <= singleQuoteCount) {
330 quotechar = '"';
331 } else {
332 quotechar = '\'';
333 }
334
335 tv = quotechar + escapeString(tv, quotechar) + quotechar;
336
337 // String concatenation transforms the old script scheme:
338 // '<scr'+'ipt ...><'+'/script>'
339 // into the following:
340 // '<script ...></script>'
341 // which breaks if this code is embedded inside an HTML document.
342 // Since this is not the right way to do this, let's fix the code by
343 // transforming all "</script" into "<\/script"
344
345 if (tv.indexOf("</script") >= 0) {
346 tv = tv.replaceAll("<\\/script", "<\\\\/script");
347 }
348
349 result.set(i, new JavaScriptToken(Token.STRING, tv));
350 }
351 }
352
353 return result;
354 }
355
356 // Add necessary escaping that was removed in Rhino's tokenizer.
357 private static String escapeString(String s, char quotechar) {
358
359 assert quotechar == '"' || quotechar == '\'';
360
361 if (s == null) {
362 return null;
363 }
364
365 StringBuffer sb = new StringBuffer();
366 for (int i = 0, L = s.length(); i < L; i++) {
367 int c = s.charAt(i);
368 if (c == quotechar) {
369 sb.append("\\");
370 }
371 sb.append((char) c);
372 }
373
374 return sb.toString();
375 }
376
377 private ErrorReporter logger;
378
379 private boolean munge;
380 private boolean warn;
381
382 private static final int BUILDING_SYMBOL_TREE = 1;
383 private static final int CHECKING_SYMBOL_TREE = 2;
384
385 private int mode;
386 private int offset;
387 private int braceNesting;
388 private ArrayList srctokens, tokens;
389 private Stack scopes = new Stack();
390 private ScriptOrFnScope globalScope = new ScriptOrFnScope(-1, null);
391 private Hashtable indexedScopes = new Hashtable();
392
393 public JavaScriptCompressor(Reader in, ErrorReporter reporter)
394 throws IOException, EvaluatorException {
395
396 this.logger = reporter;
397 this.srctokens = parse(in, reporter);
398 }
399
400 public void compress(Writer out, int linebreak, boolean munge, boolean warn,
401 boolean preserveAllSemiColons, boolean preserveStringLiterals)
402 throws IOException {
403
404 this.munge = munge;
405 this.warn = warn;
406
407 this.tokens = processStringLiterals(this.srctokens, !preserveStringLiterals);
408
409 buildSymbolTree();
410 mungeSymboltree();
411 StringBuffer sb = printSymbolTree(linebreak, preserveAllSemiColons);
412
413 out.write(sb.toString());
414 }
415
416 private ScriptOrFnScope getCurrentScope() {
417 return (ScriptOrFnScope) scopes.peek();
418 }
419
420 private void enterScope(ScriptOrFnScope scope) {
421 scopes.push(scope);
422 }
423
424 private void leaveCurrentScope() {
425 scopes.pop();
426 }
427
428 private JavaScriptToken consumeToken() {
429 return (JavaScriptToken) tokens.get(offset++);
430 }
431
432 private JavaScriptToken getToken(int delta) {
433 return (JavaScriptToken) tokens.get(offset + delta);
434 }
435
436 /*
437 * Returns the identifier for the specified symbol defined in
438 * the specified scope or in any scope above it. Returns null
439 * if this symbol does not have a corresponding identifier.
440 */
441 private JavaScriptIdentifier getIdentifier(String symbol, ScriptOrFnScope scope) {
442 JavaScriptIdentifier identifier;
443 while (scope != null) {
444 identifier = scope.getIdentifier(symbol);
445 if (identifier != null) {
446 return identifier;
447 }
448 scope = scope.getParentScope();
449 }
450 return null;
451 }
452
453 /*
454 * If either 'eval' or 'with' is used in a local scope, we must make
455 * sure that all containing local scopes don't get munged. Otherwise,
456 * the obfuscation would potentially introduce bugs.
457 */
458 private void protectScopeFromObfuscation(ScriptOrFnScope scope) {
459 assert scope != null;
460
461 if (scope == globalScope) {
462 // The global scope does not get obfuscated,
463 // so we don't need to worry about it...
464 return;
465 }
466
467 // Find the highest local scope containing the specified scope.
468 while (scope.getParentScope() != globalScope) {
469 scope = scope.getParentScope();
470 }
471
472 assert scope.getParentScope() == globalScope;
473 scope.preventMunging();
474 }
475
476 private String getDebugString(int max) {
477 assert max > 0;
478 StringBuffer result = new StringBuffer();
479 int start = Math.max(offset - max, 0);
480 int end = Math.min(offset + max, tokens.size());
481 for (int i = start; i < end; i++) {
482 JavaScriptToken token = (JavaScriptToken) tokens.get(i);
483 if (i == offset)
484 result.append(" ---> ");
485 result.append(token.getValue());
486 if (i == offset)
487 result.append(" <--- ");
488 }
489 return result.toString();
490 }
491
492 private void warn(String message, boolean showDebugString) {
493 if (warn) {
494 if (showDebugString) {
495 message = message + "\n" + getDebugString(10);
496 }
497 logger.warning(message, null, -1, null, -1);
498 }
499 }
500
501 private void parseFunctionDeclaration() {
502
503 String symbol;
504 JavaScriptToken token;
505 ScriptOrFnScope currentScope, fnScope;
506 JavaScriptIdentifier identifier;
507
508 currentScope = getCurrentScope();
509
510 token = consumeToken();
511 if (token.getType() == Token.NAME) {
512 if (mode == BUILDING_SYMBOL_TREE) {
513 // Get the name of the function and declare it in the current scope.
514 symbol = token.getValue();
515 if (currentScope.getIdentifier(symbol) != null) {
516 warn("[WARNING] The function " + symbol + " has already been declared in the same scope...", true);
517 }
518 currentScope.declareIdentifier(symbol);
519 }
520 token = consumeToken();
521 }
522
523 assert token.getType() == Token.LP;
524 if (mode == BUILDING_SYMBOL_TREE) {
525 fnScope = new ScriptOrFnScope(braceNesting, currentScope);
526 indexedScopes.put(new Integer(offset), fnScope);
527 } else {
528 fnScope = (ScriptOrFnScope) indexedScopes.get(new Integer(offset));
529 }
530
531 // Parse function arguments.
532 int argpos = 0;
533 while ((token = consumeToken()).getType() != Token.RP) {
534 assert token.getType() == Token.NAME ||
535 token.getType() == Token.COMMA;
536 if (token.getType() == Token.NAME && mode == BUILDING_SYMBOL_TREE) {
537 symbol = token.getValue();
538 identifier = fnScope.declareIdentifier(symbol);
539 if (symbol.equals("$super") && argpos == 0) {
540 // Exception for Prototype 1.6...
541 identifier.preventMunging();
542 }
543 argpos++;
544 }
545 }
546
547 token = consumeToken();
548 assert token.getType() == Token.LC;
549 braceNesting++;
550
551 token = getToken(0);
552 if (token.getType() == Token.STRING &&
553 getToken(1).getType() == Token.SEMI) {
554 // This is a hint. Hints are empty statements that look like
555 // "localvar1:nomunge, localvar2:nomunge"; They allow developers
556 // to prevent specific symbols from getting obfuscated (some heretic
557 // implementations, such as Prototype 1.6, require specific variable
558 // names, such as $super for example, in order to work appropriately.
559 // Note: right now, only "nomunge" is supported in the right hand side
560 // of a hint. However, in the future, the right hand side may contain
561 // other values.
562 consumeToken();
563 String hints = token.getValue();
564 // Remove the leading and trailing quotes...
565 hints = hints.substring(1, hints.length() - 1).trim();
566 StringTokenizer st1 = new StringTokenizer(hints, ",");
567 while (st1.hasMoreTokens()) {
568 String hint = st1.nextToken();
569 int idx = hint.indexOf(':');
570 if (idx <= 0 || idx >= hint.length() - 1) {
571 if (mode == BUILDING_SYMBOL_TREE) {
572 // No need to report the error twice, hence the test...
573 warn("[WARNING] Invalid hint syntax: " + hint, true);
574 }
575 break;
576 }
577 String variableName = hint.substring(0, idx).trim();
578 String variableType = hint.substring(idx + 1).trim();
579 if (mode == BUILDING_SYMBOL_TREE) {
580 fnScope.addHint(variableName, variableType);
581 } else if (mode == CHECKING_SYMBOL_TREE) {
582 identifier = fnScope.getIdentifier(variableName);
583 if (identifier != null) {
584 if (variableType.equals("nomunge")) {
585 identifier.preventMunging();
586 } else {
587 warn("[WARNING] Unsupported hint value: " + hint, true);
588 }
589 } else {
590 warn("[WARNING] Hint refers to an unknown identifier: " + hint, true);
591 }
592 }
593 }
594 }
595
596 parseScope(fnScope);
597 }
598
599 private void parseCatch() {
600
601 String symbol;
602 JavaScriptToken token;
603 ScriptOrFnScope currentScope;
604 JavaScriptIdentifier identifier;
605
606 token = getToken(-1);
607 assert token.getType() == Token.CATCH;
608 token = consumeToken();
609 assert token.getType() == Token.LP;
610 token = consumeToken();
611 assert token.getType() == Token.NAME;
612
613 symbol = token.getValue();
614 currentScope = getCurrentScope();
615
616 if (mode == BUILDING_SYMBOL_TREE) {
617 // We must declare the exception identifier in the containing function
618 // scope to avoid errors related to the obfuscation process. No need to
619 // display a warning if the symbol was already declared here...
620 currentScope.declareIdentifier(symbol);
621 } else {
622 identifier = getIdentifier(symbol, currentScope);
623 identifier.incrementRefcount();
624 }
625
626 token = consumeToken();
627 assert token.getType() == Token.RP;
628 }
629
630 private void parseExpression() {
631
632 // Parse the expression until we encounter a comma or a semi-colon
633 // in the same brace nesting, bracket nesting and paren nesting.
634 // Parse functions if any...
635
636 String symbol;
637 JavaScriptToken token;
638 ScriptOrFnScope currentScope;
639 JavaScriptIdentifier identifier;
640
641 int expressionBraceNesting = braceNesting;
642 int bracketNesting = 0;
643 int parensNesting = 0;
644
645 int length = tokens.size();
646
647 while (offset < length) {
648
649 token = consumeToken();
650 currentScope = getCurrentScope();
651
652 switch (token.getType()) {
653
654 case Token.SEMI:
655 case Token.COMMA:
656 if (braceNesting == expressionBraceNesting &&
657 bracketNesting == 0 &&
658 parensNesting == 0) {
659 return;
660 }
661 break;
662
663 case Token.FUNCTION:
664 parseFunctionDeclaration();
665 break;
666
667 case Token.LC:
668 braceNesting++;
669 break;
670
671 case Token.RC:
672 braceNesting--;
673 assert braceNesting >= expressionBraceNesting;
674 break;
675
676 case Token.LB:
677 bracketNesting++;
678 break;
679
680 case Token.RB:
681 bracketNesting--;
682 break;
683
684 case Token.LP:
685 parensNesting++;
686 break;
687
688 case Token.RP:
689 parensNesting--;
690 break;
691
692 case Token.IECC:
693 if (mode == BUILDING_SYMBOL_TREE) {
694 protectScopeFromObfuscation(currentScope);
695 warn("[WARNING] Using JScript conditional comments is not recommended..." + (munge ? "\n[INFO] Using JSCript conditional comments reduces the level of compression!" : ""), true);
696 }
697 break;
698
699 case Token.NAME:
700 symbol = token.getValue();
701
702 if (mode == BUILDING_SYMBOL_TREE) {
703
704 if (symbol.equals("eval")) {
705
706 protectScopeFromObfuscation(currentScope);
707 warn("[WARNING] Using 'eval' is not recommended..." + (munge ? "\n[INFO] Using 'eval' reduces the level of compression!" : ""), true);
708
709 }
710
711 } else if (mode == CHECKING_SYMBOL_TREE) {
712
713 if ((offset < 2 ||
714 (getToken(-2).getType() != Token.DOT &&
715 getToken(-2).getType() != Token.GET &&
716 getToken(-2).getType() != Token.SET)) &&
717 getToken(0).getType() != Token.OBJECTLIT) {
718
719 identifier = getIdentifier(symbol, currentScope);
720
721 if (identifier == null) {
722
723 if (symbol.length() <= 3 && !builtin.contains(symbol)) {
724 // Here, we found an undeclared and un-namespaced symbol that is
725 // 3 characters or less in length. Declare it in the global scope.
726 // We don't need to declare longer symbols since they won't cause
727 // any conflict with other munged symbols.
728 globalScope.declareIdentifier(symbol);
729 warn("[WARNING] Found an undeclared symbol: " + symbol, true);
730 }
731
732 } else {
733
734 identifier.incrementRefcount();
735 }
736 }
737 }
738 break;
739 }
740 }
741 }
742
743 private void parseScope(ScriptOrFnScope scope) {
744
745 String symbol;
746 JavaScriptToken token;
747 JavaScriptIdentifier identifier;
748
749 int length = tokens.size();
750
751 enterScope(scope);
752
753 while (offset < length) {
754
755 token = consumeToken();
756
757 switch (token.getType()) {
758
759 case Token.VAR:
760 case Token.CONST:
761
762 // The var keyword is followed by at least one symbol name.
763 // If several symbols follow, they are comma separated.
764 for (; ;) {
765 token = consumeToken();
766
767 assert token.getType() == Token.NAME;
768
769 if (mode == BUILDING_SYMBOL_TREE) {
770 symbol = token.getValue();
771 if (scope.getIdentifier(symbol) == null) {
772 scope.declareIdentifier(symbol);
773 } else {
774 warn("[WARNING] The variable " + symbol + " has already been declared in the same scope...", true);
775 }
776 }
777
778 token = getToken(0);
779
780 assert token.getType() == Token.SEMI ||
781 token.getType() == Token.ASSIGN ||
782 token.getType() == Token.COMMA ||
783 token.getType() == Token.IN;
784
785 if (token.getType() == Token.IN) {
786 break;
787 } else {
788 parseExpression();
789 token = getToken(-1);
790 if (token.getType() == Token.SEMI) {
791 break;
792 }
793 }
794 }
795 break;
796
797 case Token.FUNCTION:
798 parseFunctionDeclaration();
799 break;
800
801 case Token.LC:
802 braceNesting++;
803 break;
804
805 case Token.RC:
806 braceNesting--;
807 assert braceNesting >= scope.getBraceNesting();
808 if (braceNesting == scope.getBraceNesting()) {
809 leaveCurrentScope();
810 return;
811 }
812 break;
813
814 case Token.WITH:
815 if (mode == BUILDING_SYMBOL_TREE) {
816 // Inside a 'with' block, it is impossible to figure out
817 // statically whether a symbol is a local variable or an
818 // object member. As a consequence, the only thing we can
819 // do is turn the obfuscation off for the highest scope
820 // containing the 'with' block.
821 protectScopeFromObfuscation(scope);
822 warn("[WARNING] Using 'with' is not recommended" + (munge ? "(and it reduces the level of compression)" : ""), true);
823 }
824 break;
825
826 case Token.CATCH:
827 parseCatch();
828 break;
829
830 case Token.IECC:
831 if (mode == BUILDING_SYMBOL_TREE) {
832 protectScopeFromObfuscation(scope);
833 warn("[WARNING] Using JScript conditional comments is not recommended..." + (munge ? "\n[INFO] Using JSCript conditional comments reduces the level of compression!" : ""), true);
834 }
835 break;
836
837 case Token.NAME:
838 symbol = token.getValue();
839
840 if (mode == BUILDING_SYMBOL_TREE) {
841
842 if (symbol.equals("eval")) {
843
844 protectScopeFromObfuscation(scope);
845 warn("[WARNING] Using 'eval' is not recommended..." + (munge ? "\n[INFO] Using 'eval' reduces the level of compression!" : ""), true);
846
847 }
848
849 } else if (mode == CHECKING_SYMBOL_TREE) {
850
851 if ((offset < 2 || getToken(-2).getType() != Token.DOT) &&
852 getToken(0).getType() != Token.OBJECTLIT) {
853
854 identifier = getIdentifier(symbol, scope);
855
856 if (identifier == null) {
857
858 if (symbol.length() <= 3 && !builtin.contains(symbol)) {
859 // Here, we found an undeclared and un-namespaced symbol that is
860 // 3 characters or less in length. Declare it in the global scope.
861 // We don't need to declare longer symbols since they won't cause
862 // any conflict with other munged symbols.
863 globalScope.declareIdentifier(symbol);
864 warn("[WARNING] Found an undeclared symbol: " + symbol, true);
865 }
866
867 } else {
868
869 identifier.incrementRefcount();
870 }
871 }
872 }
873 break;
874 }
875 }
876 }
877
878 private void buildSymbolTree() {
879 offset = 0;
880 braceNesting = 0;
881 scopes.clear();
882 indexedScopes.clear();
883 indexedScopes.put(new Integer(0), globalScope);
884 mode = BUILDING_SYMBOL_TREE;
885 parseScope(globalScope);
886 }
887
888 private void mungeSymboltree() {
889
890 if (!munge) {
891 return;
892 }
893
894 // One problem with obfuscation resides in the use of undeclared
895 // and un-namespaced global symbols that are 3 characters or less
896 // in length. Here is an example:
897 //
898 // var declaredGlobalVar;
899 //
900 // function declaredGlobalFn() {
901 // var localvar;
902 // localvar = abc; // abc is an undeclared global symbol
903 // }
904 //
905 // In the example above, there is a slim chance that localvar may be
906 // munged to 'abc', conflicting with the undeclared global symbol
907 // abc, creating a potential bug. The following code detects such
908 // global symbols. This must be done AFTER the entire file has been
909 // parsed, and BEFORE munging the symbol tree. Note that declaring
910 // extra symbols in the global scope won't hurt.
911 //
912 // Note: Since we go through all the tokens to do this, we also use
913 // the opportunity to count how many times each identifier is used.
914
915 offset = 0;
916 braceNesting = 0;
917 scopes.clear();
918 mode = CHECKING_SYMBOL_TREE;
919 parseScope(globalScope);
920 globalScope.munge();
921 }
922
923 private StringBuffer printSymbolTree(int linebreakpos, boolean preserveAllSemiColons)
924 throws IOException {
925
926 offset = 0;
927 braceNesting = 0;
928 scopes.clear();
929
930 String symbol;
931 JavaScriptToken token;
932 ScriptOrFnScope currentScope;
933 JavaScriptIdentifier identifier;
934
935 int length = tokens.size();
936 StringBuffer result = new StringBuffer();
937
938 int linestartpos = 0;
939
940 enterScope(globalScope);
941
942 while (offset < length) {
943
944 token = consumeToken();
945 symbol = token.getValue();
946 currentScope = getCurrentScope();
947
948 switch (token.getType()) {
949
950 case Token.NAME:
951
952 if (offset >= 2 && getToken(-2).getType() == Token.DOT ||
953 getToken(0).getType() == Token.OBJECTLIT) {
954
955 result.append(symbol);
956
957 } else {
958
959 identifier = getIdentifier(symbol, currentScope);
960 if (identifier != null) {
961 if (identifier.getMungedValue() != null) {
962 result.append(identifier.getMungedValue());
963 } else {
964 result.append(symbol);
965 }
966 if (currentScope != globalScope && identifier.getRefcount() == 0) {
967 warn("[WARNING] The symbol " + symbol + " is declared but is apparently never used.\nThis code can probably be written in a more efficient way.", true);
968 }
969 } else {
970 result.append(symbol);
971 }
972 }
973 break;
974
975 case Token.REGEXP:
976 case Token.NUMBER:
977 case Token.STRING:
978 result.append(symbol);
979 break;
980
981 case Token.ADD:
982 case Token.SUB:
983 result.append((String) literals.get(new Integer(token.getType())));
984 if (offset < length) {
985 token = getToken(0);
986 if (token.getType() == Token.INC ||
987 token.getType() == Token.DEC ||
988 token.getType() == Token.ADD ||
989 token.getType() == Token.DEC) {
990 // Handle the case x +/- ++/-- y
991 // We must keep a white space here. Otherwise, x +++ y would be
992 // interpreted as x ++ + y by the compiler, which is a bug (due
993 // to the implicit assignment being done on the wrong variable)
994 result.append(" ");
995 } else if (token.getType() == Token.POS && getToken(-1).getType() == Token.ADD ||
996 token.getType() == Token.NEG && getToken(-1).getType() == Token.SUB) {
997 // Handle the case x + + y and x - - y
998 result.append(" ");
999 }
1000 }
1001 break;
1002
1003 case Token.FUNCTION:
1004 result.append("function");
1005 token = consumeToken();
1006 if (token.getType() == Token.NAME) {
1007 result.append(" ");
1008 symbol = token.getValue();
1009 identifier = getIdentifier(symbol, currentScope);
1010 assert identifier != null;
1011 if (identifier.getMungedValue() != null) {
1012 result.append(identifier.getMungedValue());
1013 } else {
1014 result.append(symbol);
1015 }
1016 if (currentScope != globalScope && identifier.getRefcount() == 0) {
1017 warn("[WARNING] The symbol " + symbol + " is declared but is apparently never used.\nThis code can probably be written in a more efficient way.", true);
1018 }
1019 token = consumeToken();
1020 }
1021 assert token.getType() == Token.LP;
1022 result.append("(");
1023 currentScope = (ScriptOrFnScope) indexedScopes.get(new Integer(offset));
1024 enterScope(currentScope);
1025 while ((token = consumeToken()).getType() != Token.RP) {
1026 assert token.getType() == Token.NAME || token.getType() == Token.COMMA;
1027 if (token.getType() == Token.NAME) {
1028 symbol = token.getValue();
1029 identifier = getIdentifier(symbol, currentScope);
1030 assert identifier != null;
1031 if (identifier.getMungedValue() != null) {
1032 result.append(identifier.getMungedValue());
1033 } else {
1034 result.append(symbol);
1035 }
1036 } else if (token.getType() == Token.COMMA) {
1037 result.append(",");
1038 }
1039 }
1040 result.append(")");
1041 token = consumeToken();
1042 assert token.getType() == Token.LC;
1043 result.append("{");
1044 braceNesting++;
1045 token = getToken(0);
1046 if (token.getType() == Token.STRING) {
1047 // This is a hint. Skip it!
1048 consumeToken();
1049 token = getToken(0);
1050 assert token.getType() == Token.SEMI;
1051 consumeToken();
1052 }
1053 break;
1054
1055 case Token.RETURN:
1056 result.append("return");
1057 // No space needed after 'return' when followed
1058 // by '(', '[', '{', a string or a regexp.
1059 if (offset < length) {
1060 token = getToken(0);
1061 if (token.getType() != Token.LP &&
1062 token.getType() != Token.LB &&
1063 token.getType() != Token.LC &&
1064 token.getType() != Token.STRING &&
1065 token.getType() != Token.REGEXP) {
1066 result.append(" ");
1067 }
1068 }
1069 break;
1070
1071 case Token.CASE:
1072 result.append("case");
1073 // White-space needed after 'case' when not followed by a string.
1074 if (offset < length && getToken(0).getType() != Token.STRING) {
1075 result.append(" ");
1076 }
1077 break;
1078
1079 case Token.THROW:
1080 // White-space needed after 'throw' when not followed by a string.
1081 result.append("throw");
1082 if (offset < length && getToken(0).getType() != Token.STRING) {
1083 result.append(" ");
1084 }
1085 break;
1086
1087 case Token.BREAK:
1088 result.append("break");
1089 if (offset < length && getToken(0).getType() != Token.SEMI) {
1090 // If 'break' is not followed by a semi-colon, it must be
1091 // followed by a label, hence the need for a white space.
1092 result.append(" ");
1093 }
1094 break;
1095
1096 case Token.CONTINUE:
1097 result.append("continue");
1098 if (offset < length && getToken(0).getType() != Token.SEMI) {
1099 // If 'continue' is not followed by a semi-colon, it must be
1100 // followed by a label, hence the need for a white space.
1101 result.append(" ");
1102 }
1103 break;
1104
1105 case Token.LC:
1106 result.append("{");
1107 braceNesting++;
1108 break;
1109
1110 case Token.RC:
1111 result.append("}");
1112 braceNesting--;
1113 assert braceNesting >= currentScope.getBraceNesting();
1114 if (braceNesting == currentScope.getBraceNesting()) {
1115 leaveCurrentScope();
1116 }
1117 break;
1118
1119 case Token.SEMI:
1120 // No need to output a semi-colon if the next character is a right-curly...
1121 if (preserveAllSemiColons || offset < length && getToken(0).getType() != Token.RC)
1122 result.append(";");
1123 if (linebreakpos >= 0 && result.length() - linestartpos > linebreakpos) {
1124 // Some source control tools don't like it when files containing lines longer
1125 // than, say 8000 characters, are checked in. The linebreak option is used in
1126 // that case to split long lines after a specific column.
1127 result.append("\n");
1128 linestartpos = result.length();
1129 }
1130 break;
1131
1132 case Token.IECC:
1133 result.append("/*");
1134 result.append(symbol);
1135 result.append("*/");
1136 break;
1137
1138 default:
1139 String literal = (String) literals.get(new Integer(token.getType()));
1140 if (literal != null) {
1141 result.append(literal);
1142 } else {
1143 warn("[WARNING] This symbol cannot be printed: " + symbol, true);
1144 }
1145 break;
1146 }
1147 }
1148
1149 return result;
1150 }
1151 }