Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: java/text/RuleBasedCollator.java


1   /* RuleBasedCollator.java -- Concrete Collator Class
2      Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005  Free Software Foundation, Inc.
3   
4   This file is part of GNU Classpath.
5   
6   GNU Classpath is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10   
11  GNU Classpath is distributed in the hope that it will be useful, but
12  WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  General Public License for more details.
15  
16  You should have received a copy of the GNU General Public License
17  along with GNU Classpath; see the file COPYING.  If not, write to the
18  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  02110-1301 USA.
20  
21  Linking this library statically or dynamically with other modules is
22  making a combined work based on this library.  Thus, the terms and
23  conditions of the GNU General Public License cover the whole
24  combination.
25  
26  As a special exception, the copyright holders of this library give you
27  permission to link this library with independent modules to produce an
28  executable, regardless of the license terms of these independent
29  modules, and to copy and distribute the resulting executable under
30  terms of your choice, provided that you also meet, for each linked
31  independent module, the terms and conditions of the license of that
32  module.  An independent module is a module which is not derived from
33  or based on this library.  If you modify this library, you may extend
34  this exception to your version of the library, but you are not
35  obligated to do so.  If you do not wish to do so, delete this
36  exception statement from your version. */
37  
38  
39  package java.text;
40  
41  import java.util.ArrayList;
42  import java.util.HashMap;
43  
44  /* Written using "Java Class Libraries", 2nd edition, plus online
45   * API docs for JDK 1.2 from http://www.javasoft.com.
46   * Status: Believed complete and correct
47   */
48  
49  /**
50   * This class is a concrete subclass of <code>Collator</code> suitable
51   * for string collation in a wide variety of languages.  An instance of
52   * this class is normally returned by the <code>getInstance</code> method
53   * of <code>Collator</code> with rules predefined for the requested
54   * locale.  However, an instance of this class can be created manually
55   * with any desired rules.
56   * <p>
57   * Rules take the form of a <code>String</code> with the following syntax
58   * <ul>
59   * <li> Modifier: '@'</li>
60   * <li> Relation: '&lt;' | ';' | ',' | '=' : &lt;text&gt;</li>
61   * <li> Reset: '&amp;' : &lt;text&gt;</li>
62   * </ul>
63   * The modifier character indicates that accents sort backward as is the
64   * case with French.  The modifier applies to all rules <b>after</b>
65   * the modifier but before the next primary sequence. If placed at the end
66   * of the sequence if applies to all unknown accented character.
67   * The relational operators specify how the text 
68   * argument relates to the previous term.  The relation characters have
69   * the following meanings:
70   * <ul>
71   * <li>'&lt;' - The text argument is greater than the prior term at the primary
72   * difference level.</li>
73   * <li>';' - The text argument is greater than the prior term at the secondary
74   * difference level.</li>
75   * <li>',' - The text argument is greater than the prior term at the tertiary
76   * difference level.</li>
77   * <li>'=' - The text argument is equal to the prior term</li>
78   * </ul>
79   * <p>
80   * As for the text argument itself, this is any sequence of Unicode
81   * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
82   * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are 
83   * desired, they must be enclosed in single quotes.  If any whitespace is 
84   * encountered, it is ignored.  (For example, "a b" is equal to "ab").  
85   * <p>
86   * The reset operation inserts the following rule at the point where the
87   * text argument to it exists in the previously declared rule string.  This
88   * makes it easy to add new rules to an existing string by simply including
89   * them in a reset sequence at the end.  Note that the text argument, or
90   * at least the first character of it, must be present somewhere in the
91   * previously declared rules in order to be inserted properly.  If this
92   * is not satisfied, a <code>ParseException</code> will be thrown. 
93   * <p>
94   * This system of configuring <code>RuleBasedCollator</code> is needlessly
95   * complex and the people at Taligent who developed it (along with the folks
96   * at Sun who accepted it into the Java standard library) deserve a slow
97   * and agonizing death.
98   * <p>
99   * Here are a couple of example of rule strings:
100  * <p>
101  * "&lt; a &lt; b &lt; c" - This string says that a is greater than b which is 
102  * greater than c, with all differences being primary differences.
103  * <p>
104  * "&lt; a,A &lt; b,B &lt; c,C" - This string says that 'A' is greater than 'a' with
105  * a tertiary strength comparison.  Both 'b' and 'B' are greater than 'a' and
106  * 'A' during a primary strength comparison.  But 'B' is greater than 'b'
107  * under a tertiary strength comparison.
108  * <p>
109  * "&lt; a &lt; c &amp; a &lt; b " - This sequence is identical in function to the 
110  * "&lt; a &lt; b &lt; c" rule string above.  The '&amp;' reset symbol indicates that
111  * the rule "&lt; b" is to be inserted after the text argument "a" in the
112  * previous rule string segment.
113  * <p>
114  * "&lt; a &lt; b &amp; y &lt; z" - This is an error.  The character 'y' does not appear
115  * anywhere in the previous rule string segment so the rule following the
116  * reset rule cannot be inserted.
117  * <p>
118  * "&lt; a &amp; A @ &lt; e &amp; E &lt; f&amp; F" - This sequence is equivalent to the following
119  * "&lt; a &amp; A &lt; E &amp; e &lt; f &amp; F".
120  * <p>
121  * For a description of the various comparison strength types, see the
122  * documentation for the <code>Collator</code> class.
123  * <p>
124  * As an additional complication to this already overly complex rule scheme,
125  * if any characters precede the first rule, these characters are considered
126  * ignorable.  They will be treated as if they did not exist during 
127  * comparisons.  For example, "- &lt; a &lt; b ..." would make '-' an ignorable
128  * character such that the strings "high-tech" and "hightech" would
129  * be considered identical.
130  * <p>
131  * A <code>ParseException</code> will be thrown for any of the following
132  * conditions:
133  * <ul>
134  * <li>Unquoted punctuation characters in a text argument.</li>
135  * <li>A relational or reset operator not followed by a text argument</li>
136  * <li>A reset operator where the text argument is not present in
137  * the previous rule string section.</li>
138  * </ul>
139  *
140  * @author Aaron M. Renn (arenn@urbanophile.com)
141  * @author Tom Tromey (tromey@cygnus.com)
142  * @author Guilhem Lavaux (guilhem@kaffe.org)
143  */
144 public class RuleBasedCollator extends Collator
145 {
146   /**
147    * This class describes what rank has a character (or a sequence of characters) 
148    * in the lexicographic order. Each element in a rule has a collation element.
149    */
150   static final class CollationElement
151   {
152     String key;
153     int primary;
154     short secondary;
155     short tertiary;
156     short equality;
157     boolean ignore;
158     String expansion;
159 
160     CollationElement(String key, int primary, short secondary, short tertiary,
161          short equality, String expansion, boolean ignore)
162     {
163       this.key = key;
164       this.primary = primary;
165       this.secondary = secondary;
166       this.tertiary = tertiary;
167       this.equality = equality;
168       this.ignore = ignore;
169       this.expansion = expansion;
170     }
171 
172     int getValue()
173     {
174       return (primary << 16) + (secondary << 8) + tertiary;
175     }
176   }
177 
178   /**
179    * Basic collation instruction (internal format) to build the series of
180    * collation elements. It contains an instruction which specifies the new
181    * state of the generator. The sequence of instruction should not contain
182    * RESET (it is used by
183    * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
184    * as a temporary state while merging two sets of instructions.
185    */
186   static final class CollationSorter
187   {
188     static final int GREATERP = 0;
189     static final int GREATERS = 1;
190     static final int GREATERT = 2;
191     static final int EQUAL = 3;
192     static final int RESET = 4;
193     static final int INVERSE_SECONDARY = 5;
194     
195     int comparisonType;
196     String textElement;
197     int hashText;
198     int offset;
199     boolean ignore;
200 
201     String expansionOrdering;
202   }
203 
204   /**
205    * This the the original rule string.
206    */
207   private String rules;
208 
209   /**
210    * This is the table of collation element values
211    */
212   private Object[] ce_table;
213 
214   /**
215    * Quick-prefix finder.
216    */
217   HashMap prefix_tree;
218 
219   /**
220    * This is the value of the last sequence entered into
221    * <code>ce_table</code>. It is used to compute the
222    * ordering value of unspecified character.
223    */
224   private int last_primary_value;
225 
226   /**
227    * This is the value of the last secondary sequence of the
228    * primary 0, entered into
229    * <code>ce_table</code>. It is used to compute the
230    * ordering value of an unspecified accented character.
231    */
232   private int last_tertiary_value;
233 
234   /**
235    * This variable is true if accents need to be sorted
236    * in the other direction.
237    */
238   private boolean inverseAccentComparison;
239 
240   /**
241    * This collation element is special to unknown sequence.
242    * The JDK uses it to mark and sort the characters which has
243    * no collation rules.
244    */
245   static final CollationElement SPECIAL_UNKNOWN_SEQ = 
246     new CollationElement("", (short) 32767, (short) 0, (short) 0,
247        (short) 0, null, false);
248   
249   /**
250    * This method initializes a new instance of <code>RuleBasedCollator</code>
251    * with the specified collation rules.  Note that an application normally
252    * obtains an instance of <code>RuleBasedCollator</code> by calling the
253    * <code>getInstance</code> method of <code>Collator</code>.  That method
254    * automatically loads the proper set of rules for the desired locale.
255    *
256    * @param rules The collation rule string.
257    *
258    * @exception ParseException If the rule string contains syntax errors.
259    */
260   public RuleBasedCollator(String rules) throws ParseException
261   {
262     if (rules.equals(""))
263       throw new ParseException("empty rule set", 0);
264     
265     this.rules = rules;
266 
267     buildCollationVector(parseString(rules));
268     buildPrefixAccess();
269   }
270 
271   /**
272    * This method returns the number of common characters at the beginning
273    * of the string of the two parameters.
274    *
275    * @param prefix A string considered as a prefix to test against
276    * the other string.
277    * @param s A string to test the prefix against.
278    * @return The number of common characters.
279    */
280   static int findPrefixLength(String prefix, String s)
281   {
282     int index;
283     int len = prefix.length();
284 
285     for (index = 0; index < len && index < s.length(); ++index)
286       {
287   if (prefix.charAt(index) != s.charAt(index))
288     return index;
289       }
290 
291 
292     return index;
293   }
294 
295   /**
296    * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
297    * checks whether it is possible to find an anchor point for the rules to be merged and
298    * then insert them at that precise point.
299    *
300    * @param offset Offset in the string containing rules of the beginning of the rules
301    * being merged in.
302    * @param starter Text of the rules being merged.
303    * @param main Repository of all already parsed rules.
304    * @param patch Rules to be merged into the repository.
305    * @throws ParseException if it is impossible to find an anchor point for the new rules.
306    */
307   private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch)
308     throws ParseException 
309   {
310     int insertion_point = -1;
311     int max_length = 0;
312     
313     /* We must check that no rules conflict with another already present. If it
314      * is the case delete the old rule. 
315      */
316     
317     /* For the moment good old O(N^2) algorithm.
318      */
319     for (int i = 0; i < patch.size(); i++)
320       {
321   int j = 0;
322   
323   while (j < main.size())
324     {
325       CollationSorter rule1 = (CollationSorter) patch.get(i);
326       CollationSorter rule2 = (CollationSorter) main.get(j);
327       
328       if (rule1.textElement.equals(rule2.textElement))
329         main.remove(j);
330       else
331         j++;
332     }
333       }
334 
335     // Find the insertion point... O(N)
336     for (int i = 0; i < main.size(); i++)
337       {
338   CollationSorter sorter = (CollationSorter) main.get(i);
339   int length = findPrefixLength(starter, sorter.textElement);
340     
341   if (length > max_length)
342     {
343       max_length = length;
344       insertion_point = i+1;
345     }
346       }
347 
348     if (insertion_point < 0)
349       throw new ParseException("no insertion point found for " + starter, offset);
350 
351     if (max_length < starter.length())
352       {
353   /*
354    * We need to expand the first entry. It must be sorted
355    * like if it was the reference key itself (like the spec
356    * said. So the first entry is special: the element is
357    * replaced by the specified text element for the sorting.
358    * This text replace the old one for comparisons. However
359    * to preserve the behaviour we replace the first key (corresponding
360    * to the found prefix) by a new code rightly ordered in the
361    * sequence. The rest of the subsequence must be appended
362    * to the end of the sequence.
363    */
364   CollationSorter sorter = (CollationSorter) patch.get(0);
365   CollationSorter expansionPrefix =
366     (CollationSorter) main.get(insertion_point-1);
367   
368   sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
369     
370   main.add(insertion_point, sorter);
371   
372   /*
373    * This is a new set of rules. Append to the list.
374    */
375   patch.remove(0);
376   insertion_point++;
377       }
378 
379     // Now insert all elements of patch at the insertion point.
380     for (int i = 0; i < patch.size(); i++)
381       main.add(i+insertion_point, patch.get(i));
382   }
383 
384   /**
385    * This method parses a string and build a set of sorting instructions. The parsing
386    * may only be partial on the case the rules are to be merged sometime later.
387    * 
388    * @param stop_on_reset If this parameter is true then the parser stops when it
389    * encounters a reset instruction. In the other case, it tries to parse the subrules
390    * and merged it in the same repository.
391    * @param v Output vector for the set of instructions.
392    * @param base_offset Offset in the string to begin parsing.
393    * @param rules Rules to be parsed.
394    * @return -1 if the parser reached the end of the string, an integer representing the
395    * offset in the string at which it stopped parsing. 
396    * @throws ParseException if something turned wrong during the parsing. To get details
397    * decode the message.
398    */
399   private int subParseString(boolean stop_on_reset, ArrayList v,
400            int base_offset, String rules)
401     throws ParseException
402   {
403     boolean ignoreChars = (base_offset == 0);
404     int operator = -1;
405     StringBuffer sb = new StringBuffer();
406     boolean doubleQuote = false;
407     boolean eatingChars = false;
408     boolean nextIsModifier = false;
409     boolean isModifier = false;
410     int i;
411     
412 main_parse_loop:
413     for (i = 0; i < rules.length(); i++)
414       {
415   char c = rules.charAt(i);
416   int type = -1;
417   
418   if (!eatingChars &&
419       ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
420         continue;
421 
422   isModifier = nextIsModifier;
423   nextIsModifier = false;
424 
425   if (eatingChars && c != '\'')
426     {
427       doubleQuote = false;
428       sb.append(c);
429       continue;
430     }
431   if (doubleQuote && eatingChars)
432     {
433       sb.append(c);
434       doubleQuote = false;
435       continue;
436     }
437 
438   switch (c)
439     {
440     case '!':
441       throw new ParseException
442         ("Modifier '!' is not yet supported by Classpath", i + base_offset);
443     case '<':
444       type = CollationSorter.GREATERP;
445       break;
446     case ';':
447       type = CollationSorter.GREATERS;
448       break;
449     case ',':
450       type = CollationSorter.GREATERT;
451       break;
452     case '=':
453       type = CollationSorter.EQUAL;
454       break;
455     case '\'':
456       eatingChars = !eatingChars;
457       doubleQuote = true;
458       break;
459     case '@':
460       if (ignoreChars)
461         throw new ParseException
462     ("comparison list has not yet been started. You may only use"
463      + "(<,;=&)", i + base_offset);
464       // Inverse the order of secondaries from now on.
465       nextIsModifier = true;
466       type = CollationSorter.INVERSE_SECONDARY;
467       break;
468     case '&':
469       type = CollationSorter.RESET;
470       if (stop_on_reset)
471         break main_parse_loop;
472       break;
473     default:
474       if (operator < 0)
475         throw new ParseException
476     ("operator missing at " + (i + base_offset), i + base_offset);
477       if (! eatingChars
478     && ((c >= 0x21 && c <= 0x2F) 
479         || (c >= 0x3A && c <= 0x40)
480         || (c >= 0x5B && c <= 0x60)
481         || (c >= 0x7B && c <= 0x7E)))
482         throw new ParseException
483     ("unquoted punctuation character '" + c + "'", i + base_offset);
484 
485       //type = ignoreChars ? CollationSorter.IGNORE : -1;
486       sb.append(c);
487       break;
488     }
489 
490   if (type  < 0)
491     continue;
492 
493   if (operator < 0)
494     {
495       operator = type;
496       continue;
497     }
498 
499   if (sb.length() == 0 && !isModifier)
500     throw new ParseException
501       ("text element empty at " + (i+base_offset), i+base_offset);
502 
503   if (operator == CollationSorter.RESET)
504     {
505       /* Reposition in the sorting list at the position
506        * indicated by the text element.
507        */
508       String subrules = rules.substring(i);
509       ArrayList sorted_rules = new ArrayList();
510       int idx;
511 
512       // Parse the subrules but do not iterate through all
513       // sublist. This is the priviledge of the first call.
514       idx = subParseString(true, sorted_rules, base_offset+i, subrules);
515     
516       // Merge new parsed rules into the list.
517       mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
518       sb.setLength(0);
519       
520       // Reset state to none.
521       operator = -1;
522       type = -1;
523       // We have found a new subrule at 'idx' but it has not been parsed.
524       if (idx >= 0)
525         {
526     i += idx-1;
527     continue main_parse_loop;
528         }
529       else
530     // No more rules.
531     break main_parse_loop;
532     }
533 
534   CollationSorter sorter = new CollationSorter();
535   
536   if (operator == CollationSorter.GREATERP)
537     ignoreChars = false;
538 
539   sorter.comparisonType = operator;
540   sorter.textElement = sb.toString();
541   sorter.hashText = sorter.textElement.hashCode();
542   sorter.offset = base_offset+rules.length();
543   sorter.ignore = ignoreChars;
544   sb.setLength(0);
545 
546   v.add(sorter);
547   operator = type;
548       }
549 
550     if (operator >= 0)
551       {
552   CollationSorter sorter = new CollationSorter();
553   int pos = rules.length() + base_offset;
554 
555   if ((sb.length() != 0 && nextIsModifier)
556       || (sb.length() == 0 && !nextIsModifier && !eatingChars))
557     throw new ParseException("text element empty at " + pos, pos);
558 
559   if (operator == CollationSorter.GREATERP)
560     ignoreChars = false;
561 
562   sorter.comparisonType = operator;
563   sorter.textElement = sb.toString();
564    sorter.hashText = sorter.textElement.hashCode();
565   sorter.offset = base_offset+pos;
566   sorter.ignore = ignoreChars;
567   v.add(sorter);
568       }
569 
570     if (i == rules.length())
571       return -1;
572     else
573       return i;
574   }
575 
576   /**
577    * This method creates a copy of this object.
578    *
579    * @return A copy of this object.
580    */
581   public Object clone()
582   {
583     return super.clone();
584   }
585 
586   /**
587    * This method completely parses a string 'rules' containing sorting rules.
588    *
589    * @param rules String containing the rules to be parsed. 
590    * @return A set of sorting instructions stored in a Vector.
591    * @throws ParseException if something turned wrong during the parsing. To get details
592    * decode the message.
593    */
594   private ArrayList parseString(String rules) 
595     throws ParseException
596   {
597     ArrayList v = new ArrayList();
598 
599     // result of the first subParseString is not absolute (may be -1 or a
600     // positive integer). But we do not care.
601     subParseString(false, v, 0, rules);
602     
603     return v;
604   }
605 
606   /**
607    * This method uses the sorting instructions built by {@link #parseString}
608    * to build collation elements which can be directly used to sort strings.
609    *
610    * @param parsedElements Parsed instructions stored in a ArrayList.
611    * @throws ParseException if the order of the instructions are not valid.
612    */
613   private void buildCollationVector(ArrayList parsedElements)
614     throws ParseException
615   {
616     int primary_seq = 0;
617     int last_tertiary_seq = 0;
618     short secondary_seq = 0;
619     short tertiary_seq = 0;
620     short equality_seq = 0;
621     boolean inverseComparisons = false;
622     final boolean DECREASING = false;
623     final boolean INCREASING = true;
624     boolean secondaryType = INCREASING;
625     ArrayList v = new ArrayList();
626 
627     // elts is completely sorted.
628 element_loop:
629     for (int i = 0; i < parsedElements.size(); i++)
630       {
631   CollationSorter elt = (CollationSorter) parsedElements.get(i);
632   boolean ignoreChar = false;
633 
634   switch (elt.comparisonType)
635     {
636     case CollationSorter.GREATERP:
637       primary_seq++;
638       if (inverseComparisons)
639         {
640     secondary_seq = Short.MAX_VALUE;
641     secondaryType = DECREASING;
642         }
643       else
644         {
645     secondary_seq = 0;
646     secondaryType = INCREASING;
647         }
648       tertiary_seq = 0;
649       equality_seq = 0;
650       inverseComparisons = false;
651       break;
652     case CollationSorter.GREATERS:
653       if (secondaryType == DECREASING)
654         secondary_seq--;
655       else
656         secondary_seq++;
657       tertiary_seq = 0;
658       equality_seq = 0;
659       break;
660     case CollationSorter.INVERSE_SECONDARY:
661       inverseComparisons = true;
662       continue element_loop;
663     case CollationSorter.GREATERT:
664       tertiary_seq++;
665       if (primary_seq == 0)
666         last_tertiary_seq = tertiary_seq;
667       equality_seq = 0;
668       break;
669     case CollationSorter.EQUAL:
670       equality_seq++;
671       break;
672     case CollationSorter.RESET:
673       throw new ParseException
674         ("Invalid reached state 'RESET'. Internal error", elt.offset);
675     default:
676       throw new ParseException
677         ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
678     }
679 
680   v.add(new CollationElement(elt.textElement, primary_seq,
681            secondary_seq, tertiary_seq,
682            equality_seq, elt.expansionOrdering, elt.ignore));
683       }
684 
685     this.inverseAccentComparison = inverseComparisons; 
686 
687     ce_table = v.toArray();
688 
689     last_primary_value = primary_seq+1;
690     last_tertiary_value = last_tertiary_seq+1;
691   }
692 
693   /**
694    * Build a tree where all keys are the texts of collation elements and data is
695    * the collation element itself. The tree is used when extracting all prefix
696    * for a given text.
697    */
698   private void buildPrefixAccess()
699   {
700     prefix_tree = new HashMap();
701 
702     for (int i = 0; i < ce_table.length; i++)
703       {
704   CollationElement e = (CollationElement) ce_table[i];
705 
706   prefix_tree.put(e.key, e);
707       }
708   }
709 
710   /**
711    * This method returns an integer which indicates whether the first
712    * specified <code>String</code> is less than, greater than, or equal to
713    * the second.  The value depends not only on the collation rules in
714    * effect, but also the strength and decomposition settings of this object.
715    *
716    * @param source The first <code>String</code> to compare.
717    * @param target A second <code>String</code> to compare to the first.
718    *
719    * @return A negative integer if source &lt; target, a positive integer
720    * if source &gt; target, or 0 if source == target.
721    */
722   public int compare(String source, String target)
723   {
724     CollationElementIterator cs, ct;
725     CollationElement ord1block = null;
726     CollationElement ord2block = null;
727     boolean advance_block_1 = true;
728     boolean advance_block_2 = true;
729 
730     cs = getCollationElementIterator(source);
731     ct = getCollationElementIterator(target);
732 
733     for(;;)
734       {
735   int ord1;
736   int ord2;
737 
738   /*
739    * We have to check whether the characters are ignorable.
740    * If it is the case then forget them. 
741    */
742   if (advance_block_1)
743     {
744       ord1block = cs.nextBlock();
745       if (ord1block != null && ord1block.ignore)
746         continue;
747     }
748   
749   if (advance_block_2)
750     {
751       ord2block = ct.nextBlock();
752       if (ord2block != null && ord2block.ignore)
753         {
754           advance_block_1 = false;
755           continue;
756         }
757    }
758   else
759     advance_block_2 = true;
760 
761   if (!advance_block_1)
762     advance_block_1 = true;
763 
764   if (ord1block != null)
765     ord1 = ord1block.getValue();
766   else
767     {
768       if (ord2block == null)
769         return 0;
770       return -1;
771     }
772 
773   if (ord2block == null)
774     return 1;
775   
776   ord2 = ord2block.getValue();
777   
778   // We know chars are totally equal, so skip
779         if (ord1 == ord2)
780     {
781       if (getStrength() == IDENTICAL)
782         if (!ord1block.key.equals(ord2block.key))
783     return ord1block.key.compareTo(ord2block.key);
784       continue;
785     }
786 
787         // Check for primary strength differences
788         int prim1 = CollationElementIterator.primaryOrder(ord1); 
789         int prim2 = CollationElementIterator.primaryOrder(ord2); 
790   
791   if (prim1 == 0 && getStrength() < TERTIARY)
792     {
793             advance_block_2 = false;
794       continue;
795     }
796   else if (prim2 == 0 && getStrength() < TERTIARY)
797     {
798       advance_block_1 = false;
799       continue;
800     }
801 
802         if (prim1 < prim2)
803           return -1;
804         else if (prim1 > prim2)
805           return 1;
806         else if (getStrength() == PRIMARY)
807           continue;
808 
809         // Check for secondary strength differences
810         int sec1 = CollationElementIterator.secondaryOrder(ord1);
811         int sec2 = CollationElementIterator.secondaryOrder(ord2);
812 
813   if (sec1 < sec2)
814           return -1;
815         else if (sec1 > sec2)
816           return 1;
817         else if (getStrength() == SECONDARY)
818           continue;
819 
820         // Check for tertiary differences
821         int tert1 = CollationElementIterator.tertiaryOrder(ord1);
822         int tert2 = CollationElementIterator.tertiaryOrder(ord2);
823 
824         if (tert1 < tert2)
825           return -1;
826         else if (tert1 > tert2)
827           return 1;
828   else if (getStrength() == TERTIARY)
829     continue;
830 
831   // Apparently JDK does this (at least for my test case).
832   return ord1block.key.compareTo(ord2block.key);    
833       }
834   }
835 
836   /**
837    * This method tests this object for equality against the specified 
838    * object.  This will be true if and only if the specified object is
839    * another reference to this object.
840    *
841    * @param obj The <code>Object</code> to compare against this object.
842    *
843    * @return <code>true</code> if the specified object is equal to this object,
844    * <code>false</code> otherwise.
845    */
846   public boolean equals(Object obj)
847   {
848     if (obj == this)
849       return true;
850     else
851       return false;
852   }
853 
854   /**
855    * This method builds a default collation element without invoking
856    * the database created from the rules passed to the constructor.
857    *
858    * @param c Character which needs a collation element.
859    * @return A valid brand new CollationElement instance.
860    */
861   CollationElement getDefaultElement(char c)
862   {
863     int v;
864 
865     // Preliminary support for generic accent sorting inversion (I don't know if all
866     // characters in the range should be sorted backward). This is the place
867     // to fix this if needed.
868     if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
869       v = 0x0361 - ((int) c - 0x02B9);
870     else
871       v = (short) c;
872     return new CollationElement("" + c, last_primary_value + v,
873         (short) 0, (short) 0, (short) 0, null, false);
874   }
875 
876   /**
877    * This method builds a default collation element for an accented character
878    * without invoking the database created from the rules passed to the constructor.
879    *
880    * @param c Character which needs a collation element.
881    * @return A valid brand new CollationElement instance.
882    */
883   CollationElement getDefaultAccentedElement(char c)
884   {
885     int v;
886 
887     // Preliminary support for generic accent sorting inversion (I don't know if all
888     // characters in the range should be sorted backward). This is the place
889     // to fix this if needed.
890     if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
891       v = 0x0361 - ((int) c - 0x02B9);
892     else
893       v = (short) c;
894     return new CollationElement("" + c, (short) 0,
895         (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
896   }
897 
898   /**
899    * This method returns an instance for <code>CollationElementIterator</code>
900    * for the specified <code>String</code> under the collation rules for this
901    * object.
902    *
903    * @param source The <code>String</code> to return the
904    * <code>CollationElementIterator</code> instance for.
905    *
906    * @return A <code>CollationElementIterator</code> for the specified
907    * <code>String</code>.
908    */
909   public CollationElementIterator getCollationElementIterator(String source)
910   {
911     return new CollationElementIterator(this, source);
912   }
913 
914   /**
915    * This method returns an instance of <code>CollationElementIterator</code>
916    * for the <code>String</code> represented by the specified
917    * <code>CharacterIterator</code>.
918    *
919    * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
920    *
921    * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
922    */
923   public CollationElementIterator getCollationElementIterator(CharacterIterator source)
924   {
925     StringBuffer expand = new StringBuffer("");
926     
927     // Right now we assume that we will read from the beginning of the string.
928     for (char c = source.first();
929    c != CharacterIterator.DONE;
930    c = source.next())
931       decomposeCharacter(c, expand);
932 
933     return getCollationElementIterator(expand.toString());
934   }
935 
936   /**
937    * This method returns an instance of <code>CollationKey</code> for the
938    * specified <code>String</code>.  The object returned will have a
939    * more efficient mechanism for its comparison function that could
940    * provide speed benefits if multiple comparisons are performed, such
941    * as during a sort.
942    *
943    * @param source The <code>String</code> to create a <code>CollationKey</code> for.
944    *
945    * @return A <code>CollationKey</code> for the specified <code>String</code>.
946    */
947   public CollationKey getCollationKey(String source)
948   {
949     CollationElementIterator cei = getCollationElementIterator(source);
950     ArrayList vect = new ArrayList();
951 
952     int ord = cei.next();
953     cei.reset(); //set to start of string
954 
955     while (ord != CollationElementIterator.NULLORDER)
956       {
957   // If the primary order is null, it means this is an ignorable
958   // character.
959   if (CollationElementIterator.primaryOrder(ord) == 0)
960     {
961             ord = cei.next();
962       continue;
963     }
964         switch (getStrength())
965           {
966             case PRIMARY:
967         ord = CollationElementIterator.primaryOrder(ord);
968         break;
969         
970             case SECONDARY:
971         ord = CollationElementIterator.primaryOrder(ord) << 8;
972         ord |= CollationElementIterator.secondaryOrder(ord);
973 
974             default:
975                break;
976           }
977 
978         vect.add(new Integer(ord)); 
979   ord = cei.next(); //increment to next key
980       }
981 
982     Object[] objarr = vect.toArray();
983     byte[] key = new byte[objarr.length * 4];
984 
985     for (int i = 0; i < objarr.length; i++)
986       {
987         int j = ((Integer) objarr[i]).intValue();
988         key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
989         key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
990         key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
991         key [i * 4 + 3] = (byte) (j & 0x000000FF);
992       }
993 
994     return new CollationKey(this, source, key);
995   }
996 
997   /**
998    * This method returns a <code>String</code> containing the collation rules
999    * for this object.
1000   *
1001   * @return The collation rules for this object.
1002   */
1003  public String getRules()
1004  {
1005    return rules;
1006  }
1007
1008  /**
1009   * This method returns a hash value for this object.
1010   *
1011   * @return A hash value for this object.
1012   */
1013  public int hashCode()
1014  {
1015    return System.identityHashCode(this);
1016  }
1017}