Source code: java_cup/production.java
1
2 package java_cup;
3
4 import java.util.Hashtable;
5 import java.util.Enumeration;
6
7 /** This class represents a production in the grammar. It contains
8 * a LHS non terminal, and an array of RHS symbols. As various
9 * transformations are done on the RHS of the production, it may shrink.
10 * As a result a separate length is always maintained to indicate how much
11 * of the RHS array is still valid.<p>
12 *
13 * I addition to construction and manipulation operations, productions provide
14 * methods for factoring out actions (see remove_embedded_actions()), for
15 * computing the nullability of the production (i.e., can it derive the empty
16 * string, see check_nullable()), and operations for computing its first
17 * set (i.e., the set of terminals that could appear at the beginning of some
18 * string derived from the production, see check_first_set()).
19 *
20 * @see java_cup.production_part
21 * @see java_cup.symbol_part
22 * @see java_cup.action_part
23 * @version last updated: 7/3/96
24 * @author Frank Flannery
25 */
26
27 public class production {
28
29 /*-----------------------------------------------------------*/
30 /*--- Constructor(s) ----------------------------------------*/
31 /*-----------------------------------------------------------*/
32
33 /** Full constructor. This constructor accepts a LHS non terminal,
34 * an array of RHS parts (including terminals, non terminals, and
35 * actions), and a string for a final reduce action. It does several
36 * manipulations in the process of creating a production object.
37 * After some validity checking it translates labels that appear in
38 * actions into code for accessing objects on the runtime parse stack.
39 * It them merges adjacent actions if they appear and moves any trailing
40 * action into the final reduce actions string. Next it removes any
41 * embedded actions by factoring them out with new action productions.
42 * Finally it assigns a unique index to the production.<p>
43 *
44 * Factoring out of actions is accomplished by creating new "hidden"
45 * non terminals. For example if the production was originally: <pre>
46 * A ::= B {action} C D
47 * </pre>
48 * then it is factored into two productions:<pre>
49 * A ::= B X C D
50 * X ::= {action}
51 * </pre>
52 * (where X is a unique new non terminal). This has the effect of placing
53 * all actions at the end where they can be handled as part of a reduce by
54 * the parser.
55 */
56 public production(
57 non_terminal lhs_sym,
58 production_part rhs_parts[],
59 int rhs_l,
60 String action_str)
61 throws internal_error
62 {
63 int i;
64 action_part tail_action;
65 String declare_str;
66 int rightlen = rhs_l;
67
68 /* remember the length */
69 if (rhs_l >= 0)
70 _rhs_length = rhs_l;
71 else if (rhs_parts != null)
72 _rhs_length = rhs_parts.length;
73 else
74 _rhs_length = 0;
75
76 /* make sure we have a valid left-hand-side */
77 if (lhs_sym == null)
78 throw new internal_error(
79 "Attempt to construct a production with a null LHS");
80
81 /* I'm not translating labels anymore, I'm adding code to declare
82 labels as valid variables. This way, the users code string is
83 untouched
84 6/96 frankf */
85
86 /* check if the last part of the right hand side is an action. If
87 it is, it won't be on the stack, so we don't want to count it
88 in the rightlen. Then when we search down the stack for a
89 Symbol, we don't try to search past action */
90
91 if (rhs_l > 0) {
92 if (rhs_parts[rhs_l - 1].is_action()) {
93 rightlen = rhs_l - 1;
94 } else {
95 rightlen = rhs_l;
96 }
97 }
98
99 /* get the generated declaration code for the necessary labels. */
100 declare_str = declare_labels(
101 rhs_parts, rightlen, action_str);
102
103 if (action_str == null)
104 action_str = declare_str;
105 else
106 action_str = declare_str + action_str;
107
108 /* count use of lhs */
109 lhs_sym.note_use();
110
111 /* create the part for left-hand-side */
112 _lhs = new symbol_part(lhs_sym);
113
114 /* merge adjacent actions (if any) */
115 _rhs_length = merge_adjacent_actions(rhs_parts, _rhs_length);
116
117 /* strip off any trailing action */
118 tail_action = strip_trailing_action(rhs_parts, _rhs_length);
119 if (tail_action != null) _rhs_length--;
120
121 /* Why does this run through the right hand side happen
122 over and over? here a quick combination of two
123 prior runs plus one I wanted of my own
124 frankf 6/25/96 */
125 /* allocate and copy over the right-hand-side */
126 /* count use of each rhs symbol */
127 _rhs = new production_part[_rhs_length];
128 for (i=0; i<_rhs_length; i++) {
129 _rhs[i] = rhs_parts[i];
130 if (!_rhs[i].is_action()) {
131 ((symbol_part)_rhs[i]).the_symbol().note_use();
132 if (((symbol_part)_rhs[i]).the_symbol() instanceof terminal) {
133 _rhs_prec =
134 ((terminal)((symbol_part)_rhs[i]).the_symbol()).precedence_num();
135 _rhs_assoc =
136 ((terminal)((symbol_part)_rhs[i]).the_symbol()).precedence_side();
137 }
138 }
139 }
140
141 /*now action string is really declaration string, so put it in front!
142 6/14/96 frankf */
143 if (action_str == null) action_str = "";
144 if (tail_action != null && tail_action.code_string() != null)
145 action_str = action_str + "\t\t" + tail_action.code_string();
146
147 /* stash the action */
148 _action = new action_part(action_str);
149
150 /* rewrite production to remove any embedded actions */
151 remove_embedded_actions();
152
153 /* assign an index */
154 _index = next_index++;
155
156 /* put us in the global collection of productions */
157 _all.put(new Integer(_index),this);
158
159 /* put us in the production list of the lhs non terminal */
160 lhs_sym.add_production(this);
161 }
162
163 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
164
165 /** Constructor with no action string. */
166 public production(
167 non_terminal lhs_sym,
168 production_part rhs_parts[],
169 int rhs_l)
170 throws internal_error
171 {
172 this(lhs_sym,rhs_parts,rhs_l,null);
173 }
174
175 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
176
177 /* Constructor with precedence and associativity of production
178 contextually define */
179 public production(
180 non_terminal lhs_sym,
181 production_part rhs_parts[],
182 int rhs_l,
183 String action_str,
184 int prec_num,
185 int prec_side)
186 throws internal_error
187 {
188 this(lhs_sym,rhs_parts,rhs_l,action_str);
189
190 /* set the precedence */
191 set_precedence_num(prec_num);
192 set_precedence_side(prec_side);
193 }
194
195 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
196
197 /* Constructor w/ no action string and contextual precedence
198 defined */
199 public production(
200 non_terminal lhs_sym,
201 production_part rhs_parts[],
202 int rhs_l,
203 int prec_num,
204 int prec_side)
205 throws internal_error
206 {
207 this(lhs_sym,rhs_parts,rhs_l,null);
208 /* set the precedence */
209 set_precedence_num(prec_num);
210 set_precedence_side(prec_side);
211 }
212
213 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
214
215 /*-----------------------------------------------------------*/
216 /*--- (Access to) Static (Class) Variables ------------------*/
217 /*-----------------------------------------------------------*/
218
219
220 /** Table of all productions. Elements are stored using their index as
221 * the key.
222 */
223 protected static Hashtable _all = new Hashtable();
224
225 /** Access to all productions. */
226 public static Enumeration all() {return _all.elements();}
227
228 /** Lookup a production by index. */
229 public static production find(int indx) {
230 return (production) _all.get(new Integer(indx));
231 }
232
233 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
234
235 /** Total number of productions. */
236 public static int number() {return _all.size();}
237
238 /** Static counter for assigning unique index numbers. */
239 protected static int next_index;
240
241 /*-----------------------------------------------------------*/
242 /*--- (Access to) Instance Variables ------------------------*/
243 /*-----------------------------------------------------------*/
244
245 /** The left hand side non-terminal. */
246 protected symbol_part _lhs;
247
248 /** The left hand side non-terminal. */
249 public symbol_part lhs() {return _lhs;}
250
251
252 /** The precedence of the rule */
253 protected int _rhs_prec = -1;
254 protected int _rhs_assoc = -1;
255
256 /** Access to the precedence of the rule */
257 public int precedence_num() { return _rhs_prec; }
258 public int precedence_side() { return _rhs_assoc; }
259
260 /** Setting the precedence of a rule */
261 public void set_precedence_num(int prec_num) {
262 _rhs_prec = prec_num;
263 }
264 public void set_precedence_side(int prec_side) {
265 _rhs_assoc = prec_side;
266 }
267
268 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
269
270 /** A collection of parts for the right hand side. */
271 protected production_part _rhs[];
272
273 /** Access to the collection of parts for the right hand side. */
274 public production_part rhs(int indx) throws internal_error
275 {
276 if (indx >= 0 && indx < _rhs_length)
277 return _rhs[indx];
278 else
279 throw new internal_error(
280 "Index out of range for right hand side of production");
281 }
282
283 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
284
285 /** How much of the right hand side array we are presently using. */
286 protected int _rhs_length;
287
288 /** How much of the right hand side array we are presently using. */
289 public int rhs_length() {return _rhs_length;}
290
291 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
292
293 /** An action_part containing code for the action to be performed when we
294 * reduce with this production.
295 */
296 protected action_part _action;
297
298 /** An action_part containing code for the action to be performed when we
299 * reduce with this production.
300 */
301 public action_part action() {return _action;}
302
303 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
304
305 /** Index number of the production. */
306 protected int _index;
307
308 /** Index number of the production. */
309 public int index() {return _index;}
310
311 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
312
313 /** Count of number of reductions using this production. */
314 protected int _num_reductions = 0;
315
316 /** Count of number of reductions using this production. */
317 public int num_reductions() {return _num_reductions;}
318
319 /** Increment the count of reductions with this non-terminal */
320 public void note_reduction_use() {_num_reductions++;}
321
322 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
323
324 /** Is the nullability of the production known or unknown? */
325 protected boolean _nullable_known = false;
326
327 /** Is the nullability of the production known or unknown? */
328 public boolean nullable_known() {return _nullable_known;}
329
330 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
331
332 /** Nullability of the production (can it derive the empty string). */
333 protected boolean _nullable = false;
334
335 /** Nullability of the production (can it derive the empty string). */
336 public boolean nullable() {return _nullable;}
337
338 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
339
340 /** First set of the production. This is the set of terminals that
341 * could appear at the front of some string derived from this production.
342 */
343 protected terminal_set _first_set = new terminal_set();
344
345 /** First set of the production. This is the set of terminals that
346 * could appear at the front of some string derived from this production.
347 */
348 public terminal_set first_set() {return _first_set;}
349
350 /*-----------------------------------------------------------*/
351 /*--- Static Methods ----------------------------------------*/
352 /*-----------------------------------------------------------*/
353
354 /** Determine if a given character can be a label id starter.
355 * @param c the character in question.
356 */
357 protected static boolean is_id_start(char c)
358 {
359 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_');
360
361 //later need to handle non-8-bit chars here
362 }
363
364 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
365
366 /** Determine if a character can be in a label id.
367 * @param c the character in question.
368 */
369 protected static boolean is_id_char(char c)
370 {
371 return is_id_start(c) || (c >= '0' && c <= '9');
372 }
373
374 /*-----------------------------------------------------------*/
375 /*--- General Methods ---------------------------------------*/
376 /*-----------------------------------------------------------*/
377
378
379 /** Return label declaration code
380 * @param labelname the label name
381 * @param stack_type the stack type of label?
382 * @author frankf
383 */
384 protected String make_declaration(
385 String labelname,
386 String stack_type,
387 int offset)
388 {
389 String ret;
390
391 /* Put in the left/right value labels */
392 if (emit.lr_values())
393 ret = "\t\tint " + labelname + "left = ((java_cup.runtime.Symbol)" +
394 emit.pre("stack") + ".elementAt(" + emit.pre("top") +
395 "-" + offset + ")).left;\n" +
396 "\t\tint " + labelname + "right = ((java_cup.runtime.Symbol)" +
397 emit.pre("stack") + ".elementAt(" + emit.pre("top") +
398 "-" + offset + ")).right;\n";
399 else ret = "";
400
401 /* otherwise, just declare label. */
402 return ret + "\t\t" + stack_type + " " + labelname + " = (" + stack_type +
403 ")((" + "java_cup.runtime.Symbol) " + emit.pre("stack") + ".elementAt(" + emit.pre("top")
404 + "-" + offset + ")).value;\n";
405
406 }
407 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
408
409 /** Declare label names as valid variables within the action string
410 * @param rhs array of RHS parts.
411 * @param rhs_len how much of rhs to consider valid.
412 * @param final_action the final action string of the production.
413 * @param lhs_type the object type associated with the LHS symbol.
414 */
415 protected String declare_labels(
416 production_part rhs[],
417 int rhs_len,
418 String final_action)
419 {
420 String declaration = "";
421
422 symbol_part part;
423 action_part act_part;
424 int pos;
425
426 /* walk down the parts and extract the labels */
427 for (pos = 0; pos < rhs_len; pos++)
428 {
429 if (!rhs[pos].is_action())
430 {
431 part = (symbol_part)rhs[pos];
432
433 /* if it has a label, make declaration! */
434 if (part.label() != null)
435 {
436 declaration = declaration +
437 make_declaration(part.label(), part.the_symbol().stack_type(),
438 rhs_len-pos-1);
439 }
440 }
441 }
442 return declaration;
443 }
444
445
446
447 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
448
449 /** Helper routine to merge adjacent actions in a set of RHS parts
450 * @param rhs_parts array of RHS parts.
451 * @param len amount of that array that is valid.
452 * @return remaining valid length.
453 */
454 protected int merge_adjacent_actions(production_part rhs_parts[], int len)
455 {
456 int from_loc, to_loc, merge_cnt;
457
458 /* bail out early if we have no work to do */
459 if (rhs_parts == null || len == 0) return 0;
460
461 merge_cnt = 0;
462 to_loc = -1;
463 for (from_loc=0; from_loc<len; from_loc++)
464 {
465 /* do we go in the current position or one further */
466 if (to_loc < 0 || !rhs_parts[to_loc].is_action()
467 || !rhs_parts[from_loc].is_action())
468 {
469 /* next one */
470 to_loc++;
471
472 /* clear the way for it */
473 if (to_loc != from_loc) rhs_parts[to_loc] = null;
474 }
475
476 /* if this is not trivial? */
477 if (to_loc != from_loc)
478 {
479 /* do we merge or copy? */
480 if (rhs_parts[to_loc] != null && rhs_parts[to_loc].is_action() &&
481 rhs_parts[from_loc].is_action())
482 {
483 /* merge */
484 rhs_parts[to_loc] = new action_part(
485 ((action_part)rhs_parts[to_loc]).code_string() +
486 ((action_part)rhs_parts[from_loc]).code_string());
487 merge_cnt++;
488 }
489 else
490 {
491 /* copy */
492 rhs_parts[to_loc] = rhs_parts[from_loc];
493 }
494 }
495 }
496
497 /* return the used length */
498 return len - merge_cnt;
499 }
500
501 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
502
503 /** Helper routine to strip a trailing action off rhs and return it
504 * @param rhs_parts array of RHS parts.
505 * @param len how many of those are valid.
506 * @return the removed action part.
507 */
508 protected action_part strip_trailing_action(
509 production_part rhs_parts[],
510 int len)
511 {
512 action_part result;
513
514 /* bail out early if we have nothing to do */
515 if (rhs_parts == null || len == 0) return null;
516
517 /* see if we have a trailing action */
518 if (rhs_parts[len-1].is_action())
519 {
520 /* snip it out and return it */
521 result = (action_part)rhs_parts[len-1];
522 rhs_parts[len-1] = null;
523 return result;
524 }
525 else
526 return null;
527 }
528
529 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
530
531 /** Remove all embedded actions from a production by factoring them
532 * out into individual action production using new non terminals.
533 * if the original production was: <pre>
534 * A ::= B {action1} C {action2} D
535 * </pre>
536 * then it will be factored into: <pre>
537 * A ::= B NT$1 C NT$2 D
538 * NT$1 ::= {action1}
539 * NT$2 ::= {action2}
540 * </pre>
541 * where NT$1 and NT$2 are new system created non terminals.
542 */
543
544 /* the declarations added to the parent production are also passed along,
545 as they should be perfectly valid in this code string, since it
546 was originally a code string in the parent, not on its own.
547 frank 6/20/96 */
548 protected void remove_embedded_actions(
549
550 ) throws internal_error
551 {
552 non_terminal new_nt;
553 production new_prod;
554 String declare_str;
555
556 /* walk over the production and process each action */
557 for (int act_loc = 0; act_loc < rhs_length(); act_loc++)
558 if (rhs(act_loc).is_action())
559 {
560
561
562 declare_str = declare_labels(
563 _rhs, act_loc, "");
564 /* create a new non terminal for the action production */
565 new_nt = non_terminal.create_new();
566 new_nt.is_embedded_action = true; /* 24-Mar-1998, CSA */
567
568 /* create a new production with just the action */
569 new_prod = new action_production(this, new_nt, null, 0,
570 declare_str + ((action_part)rhs(act_loc)).code_string());
571
572 /* replace the action with the generated non terminal */
573 _rhs[act_loc] = new symbol_part(new_nt);
574 }
575 }
576
577 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
578
579 /** Check to see if the production (now) appears to be nullable.
580 * A production is nullable if its RHS could derive the empty string.
581 * This results when the RHS is empty or contains only non terminals
582 * which themselves are nullable.
583 */
584 public boolean check_nullable() throws internal_error
585 {
586 production_part part;
587 symbol sym;
588 int pos;
589
590 /* if we already know bail out early */
591 if (nullable_known()) return nullable();
592
593 /* if we have a zero size RHS we are directly nullable */
594 if (rhs_length() == 0)
595 {
596 /* stash and return the result */
597 return set_nullable(true);
598 }
599
600 /* otherwise we need to test all of our parts */
601 for (pos=0; pos<rhs_length(); pos++)
602 {
603 part = rhs(pos);
604
605 /* only look at non-actions */
606 if (!part.is_action())
607 {
608 sym = ((symbol_part)part).the_symbol();
609
610 /* if its a terminal we are definitely not nullable */
611 if (!sym.is_non_term())
612 return set_nullable(false);
613 /* its a non-term, is it marked nullable */
614 else if (!((non_terminal)sym).nullable())
615 /* this one not (yet) nullable, so we aren't */
616 return false;
617 }
618 }
619
620 /* if we make it here all parts are nullable */
621 return set_nullable(true);
622 }
623
624 /** set (and return) nullability */
625 boolean set_nullable(boolean v)
626 {
627 _nullable_known = true;
628 _nullable = v;
629 return v;
630 }
631
632 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
633
634 /** Update (and return) the first set based on current NT firsts.
635 * This assumes that nullability has already been computed for all non
636 * terminals and productions.
637 */
638 public terminal_set check_first_set() throws internal_error
639 {
640 int part;
641 symbol sym;
642
643 /* walk down the right hand side till we get past all nullables */
644 for (part=0; part<rhs_length(); part++)
645 {
646 /* only look at non-actions */
647 if (!rhs(part).is_action())
648 {
649 sym = ((symbol_part)rhs(part)).the_symbol();
650
651 /* is it a non-terminal?*/
652 if (sym.is_non_term())
653 {
654 /* add in current firsts from that NT */
655 _first_set.add(((non_terminal)sym).first_set());
656
657 /* if its not nullable, we are done */
658 if (!((non_terminal)sym).nullable())
659 break;
660 }
661 else
662 {
663 /* its a terminal -- add that to the set */
664 _first_set.add((terminal)sym);
665
666 /* we are done */
667 break;
668 }
669 }
670 }
671
672 /* return our updated first set */
673 return first_set();
674 }
675
676 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
677
678 /** Equality comparison. */
679 public boolean equals(production other)
680 {
681 if (other == null) return false;
682 return other._index == _index;
683 }
684
685 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
686
687 /** Generic equality comparison. */
688 public boolean equals(Object other)
689 {
690 if (!(other instanceof production))
691 return false;
692 else
693 return equals((production)other);
694 }
695
696 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
697
698 /** Produce a hash code. */
699 public int hashCode()
700 {
701 /* just use a simple function of the index */
702 return _index*13;
703 }
704
705 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
706
707 /** Convert to a string. */
708 public String toString()
709 {
710 String result;
711
712 /* catch any internal errors */
713 try {
714 result = "production [" + index() + "]: ";
715 result += ((lhs() != null) ? lhs().toString() : "$$NULL-LHS$$");
716 result += " :: = ";
717 for (int i = 0; i<rhs_length(); i++)
718 result += rhs(i) + " ";
719 result += ";";
720 if (action() != null && action().code_string() != null)
721 result += " {" + action().code_string() + "}";
722
723 if (nullable_known())
724 if (nullable())
725 result += "[NULLABLE]";
726 else
727 result += "[NOT NULLABLE]";
728 } catch (internal_error e) {
729 /* crash on internal error since we can't throw it from here (because
730 superclass does not throw anything. */
731 e.crash();
732 result = null;
733 }
734
735 return result;
736 }
737
738 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
739
740 /** Convert to a simpler string. */
741 public String to_simple_string() throws internal_error
742 {
743 String result;
744
745 result = ((lhs() != null) ? lhs().the_symbol().name() : "NULL_LHS");
746 result += " ::= ";
747 for (int i = 0; i < rhs_length(); i++)
748 if (!rhs(i).is_action())
749 result += ((symbol_part)rhs(i)).the_symbol().name() + " ";
750
751 return result;
752 }
753
754 /*-----------------------------------------------------------*/
755
756 }