Save This Page
Home » openjdk-7 » java » text » [javadoc | source]
    1   /*
    2    * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
    3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4    *
    5    * This code is free software; you can redistribute it and/or modify it
    6    * under the terms of the GNU General Public License version 2 only, as
    7    * published by the Free Software Foundation.  Sun designates this
    8    * particular file as subject to the "Classpath" exception as provided
    9    * by Sun in the LICENSE file that accompanied this code.
   10    *
   11    * This code is distributed in the hope that it will be useful, but WITHOUT
   12    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13    * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   14    * version 2 for more details (a copy is included in the LICENSE file that
   15    * accompanied this code).
   16    *
   17    * You should have received a copy of the GNU General Public License version
   18    * 2 along with this work; if not, write to the Free Software Foundation,
   19    * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   20    *
   21    * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
   22    * CA 95054 USA or visit www.sun.com if you need additional information or
   23    * have any questions.
   24    */
   25   
   26   /*
   27    * (C) Copyright Taligent, Inc. 1996-1998 -  All Rights Reserved
   28    * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
   29    *
   30    *   The original version of this source code and documentation is copyrighted
   31    * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
   32    * materials are provided under terms of a License Agreement between Taligent
   33    * and Sun. This technology is protected by multiple US and International
   34    * patents. This notice and attribution to Taligent may not be removed.
   35    *   Taligent is a registered trademark of Taligent, Inc.
   36    *
   37    */
   38   
   39   package java.text;
   40   
   41   import java.text.spi.CollatorProvider;
   42   import java.util.Locale;
   43   import java.util.MissingResourceException;
   44   import java.util.ResourceBundle;
   45   import java.util.spi.LocaleServiceProvider;
   46   import sun.misc.SoftCache;
   47   import sun.util.resources.LocaleData;
   48   import sun.util.LocaleServiceProviderPool;
   49   
   50   
   51   /**
   52    * The <code>Collator</code> class performs locale-sensitive
   53    * <code>String</code> comparison. You use this class to build
   54    * searching and sorting routines for natural language text.
   55    *
   56    * <p>
   57    * <code>Collator</code> is an abstract base class. Subclasses
   58    * implement specific collation strategies. One subclass,
   59    * <code>RuleBasedCollator</code>, is currently provided with
   60    * the Java Platform and is applicable to a wide set of languages. Other
   61    * subclasses may be created to handle more specialized needs.
   62    *
   63    * <p>
   64    * Like other locale-sensitive classes, you can use the static
   65    * factory method, <code>getInstance</code>, to obtain the appropriate
   66    * <code>Collator</code> object for a given locale. You will only need
   67    * to look at the subclasses of <code>Collator</code> if you need
   68    * to understand the details of a particular collation strategy or
   69    * if you need to modify that strategy.
   70    *
   71    * <p>
   72    * The following example shows how to compare two strings using
   73    * the <code>Collator</code> for the default locale.
   74    * <blockquote>
   75    * <pre>
   76    * // Compare two strings in the default locale
   77    * Collator myCollator = Collator.getInstance();
   78    * if( myCollator.compare("abc", "ABC") < 0 )
   79    *     System.out.println("abc is less than ABC");
   80    * else
   81    *     System.out.println("abc is greater than or equal to ABC");
   82    * </pre>
   83    * </blockquote>
   84    *
   85    * <p>
   86    * You can set a <code>Collator</code>'s <em>strength</em> property
   87    * to determine the level of difference considered significant in
   88    * comparisons. Four strengths are provided: <code>PRIMARY</code>,
   89    * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
   90    * The exact assignment of strengths to language features is
   91    * locale dependant.  For example, in Czech, "e" and "f" are considered
   92    * primary differences, while "e" and "&#283;" are secondary differences,
   93    * "e" and "E" are tertiary differences and "e" and "e" are identical.
   94    * The following shows how both case and accents could be ignored for
   95    * US English.
   96    * <blockquote>
   97    * <pre>
   98    * //Get the Collator for US English and set its strength to PRIMARY
   99    * Collator usCollator = Collator.getInstance(Locale.US);
  100    * usCollator.setStrength(Collator.PRIMARY);
  101    * if( usCollator.compare("abc", "ABC") == 0 ) {
  102    *     System.out.println("Strings are equivalent");
  103    * }
  104    * </pre>
  105    * </blockquote>
  106    * <p>
  107    * For comparing <code>String</code>s exactly once, the <code>compare</code>
  108    * method provides the best performance. When sorting a list of
  109    * <code>String</code>s however, it is generally necessary to compare each
  110    * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
  111    * provide better performance. The <code>CollationKey</code> class converts
  112    * a <code>String</code> to a series of bits that can be compared bitwise
  113    * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
  114    * created by a <code>Collator</code> object for a given <code>String</code>.
  115    * <br>
  116    * <strong>Note:</strong> <code>CollationKey</code>s from different
  117    * <code>Collator</code>s can not be compared. See the class description
  118    * for {@link CollationKey}
  119    * for an example using <code>CollationKey</code>s.
  120    *
  121    * @see         RuleBasedCollator
  122    * @see         CollationKey
  123    * @see         CollationElementIterator
  124    * @see         Locale
  125    * @author      Helena Shih, Laura Werner, Richard Gillam
  126    */
  127   
  128   public abstract class Collator
  129       implements java.util.Comparator<Object>, Cloneable
  130   {
  131       /**
  132        * Collator strength value.  When set, only PRIMARY differences are
  133        * considered significant during comparison. The assignment of strengths
  134        * to language features is locale dependant. A common example is for
  135        * different base letters ("a" vs "b") to be considered a PRIMARY difference.
  136        * @see java.text.Collator#setStrength
  137        * @see java.text.Collator#getStrength
  138        */
  139       public final static int PRIMARY = 0;
  140       /**
  141        * Collator strength value.  When set, only SECONDARY and above differences are
  142        * considered significant during comparison. The assignment of strengths
  143        * to language features is locale dependant. A common example is for
  144        * different accented forms of the same base letter ("a" vs "\u00E4") to be
  145        * considered a SECONDARY difference.
  146        * @see java.text.Collator#setStrength
  147        * @see java.text.Collator#getStrength
  148        */
  149       public final static int SECONDARY = 1;
  150       /**
  151        * Collator strength value.  When set, only TERTIARY and above differences are
  152        * considered significant during comparison. The assignment of strengths
  153        * to language features is locale dependant. A common example is for
  154        * case differences ("a" vs "A") to be considered a TERTIARY difference.
  155        * @see java.text.Collator#setStrength
  156        * @see java.text.Collator#getStrength
  157        */
  158       public final static int TERTIARY = 2;
  159   
  160       /**
  161        * Collator strength value.  When set, all differences are
  162        * considered significant during comparison. The assignment of strengths
  163        * to language features is locale dependant. A common example is for control
  164        * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at the
  165        * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
  166        * level.  Additionally, differences between pre-composed accents such as
  167        * "&#092;u00C0" (A-grave) and combining accents such as "A&#092;u0300"
  168        * (A, combining-grave) will be considered significant at the IDENTICAL
  169        * level if decomposition is set to NO_DECOMPOSITION.
  170        */
  171       public final static int IDENTICAL = 3;
  172   
  173       /**
  174        * Decomposition mode value. With NO_DECOMPOSITION
  175        * set, accented characters will not be decomposed for collation. This
  176        * is the default setting and provides the fastest collation but
  177        * will only produce correct results for languages that do not use accents.
  178        * @see java.text.Collator#getDecomposition
  179        * @see java.text.Collator#setDecomposition
  180        */
  181       public final static int NO_DECOMPOSITION = 0;
  182   
  183       /**
  184        * Decomposition mode value. With CANONICAL_DECOMPOSITION
  185        * set, characters that are canonical variants according to Unicode
  186        * standard will be decomposed for collation. This should be used to get
  187        * correct collation of accented characters.
  188        * <p>
  189        * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
  190        * described in
  191        * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
  192        * Technical Report #15</a>.
  193        * @see java.text.Collator#getDecomposition
  194        * @see java.text.Collator#setDecomposition
  195        */
  196       public final static int CANONICAL_DECOMPOSITION = 1;
  197   
  198       /**
  199        * Decomposition mode value. With FULL_DECOMPOSITION
  200        * set, both Unicode canonical variants and Unicode compatibility variants
  201        * will be decomposed for collation.  This causes not only accented
  202        * characters to be collated, but also characters that have special formats
  203        * to be collated with their norminal form. For example, the half-width and
  204        * full-width ASCII and Katakana characters are then collated together.
  205        * FULL_DECOMPOSITION is the most complete and therefore the slowest
  206        * decomposition mode.
  207        * <p>
  208        * FULL_DECOMPOSITION corresponds to Normalization Form KD as
  209        * described in
  210        * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
  211        * Technical Report #15</a>.
  212        * @see java.text.Collator#getDecomposition
  213        * @see java.text.Collator#setDecomposition
  214        */
  215       public final static int FULL_DECOMPOSITION = 2;
  216   
  217       /**
  218        * Gets the Collator for the current default locale.
  219        * The default locale is determined by java.util.Locale.getDefault.
  220        * @return the Collator for the default locale.(for example, en_US)
  221        * @see java.util.Locale#getDefault
  222        */
  223       public static synchronized Collator getInstance() {
  224           return getInstance(Locale.getDefault());
  225       }
  226   
  227       /**
  228        * Gets the Collator for the desired locale.
  229        * @param desiredLocale the desired locale.
  230        * @return the Collator for the desired locale.
  231        * @see java.util.Locale
  232        * @see java.util.ResourceBundle
  233        */
  234       public static synchronized
  235       Collator getInstance(Locale desiredLocale)
  236       {
  237           Collator result = (Collator) cache.get(desiredLocale);
  238           if (result != null) {
  239                    return (Collator)result.clone();  // make the world safe
  240           }
  241   
  242           // Check whether a provider can provide an implementation that's closer
  243           // to the requested locale than what the Java runtime itself can provide.
  244           LocaleServiceProviderPool pool =
  245               LocaleServiceProviderPool.getPool(CollatorProvider.class);
  246           if (pool.hasProviders()) {
  247               Collator providersInstance = pool.getLocalizedObject(
  248                                               CollatorGetter.INSTANCE,
  249                                               desiredLocale,
  250                                               desiredLocale);
  251               if (providersInstance != null) {
  252                   return providersInstance;
  253               }
  254           }
  255   
  256           // Load the resource of the desired locale from resource
  257           // manager.
  258           String colString = "";
  259           try {
  260               ResourceBundle resource = LocaleData.getCollationData(desiredLocale);
  261   
  262               colString = resource.getString("Rule");
  263           } catch (MissingResourceException e) {
  264               // Use default values
  265           }
  266           try
  267           {
  268               result = new RuleBasedCollator( CollationRules.DEFAULTRULES +
  269                                               colString,
  270                                               CANONICAL_DECOMPOSITION );
  271           }
  272           catch(ParseException foo)
  273           {
  274               // predefined tables should contain correct grammar
  275               try {
  276                   result = new RuleBasedCollator( CollationRules.DEFAULTRULES );
  277               } catch (ParseException bar) {
  278                   // do nothing
  279               }
  280           }
  281           // Now that RuleBasedCollator adds expansions for pre-composed characters
  282           // into their decomposed equivalents, the default collators don't need
  283           // to have decomposition turned on.  Laura, 5/5/98, bug 4114077
  284           result.setDecomposition(NO_DECOMPOSITION);
  285   
  286           cache.put(desiredLocale,result);
  287           return (Collator)result.clone();
  288       }
  289   
  290       /**
  291        * Compares the source string to the target string according to the
  292        * collation rules for this Collator.  Returns an integer less than,
  293        * equal to or greater than zero depending on whether the source String is
  294        * less than, equal to or greater than the target string.  See the Collator
  295        * class description for an example of use.
  296        * <p>
  297        * For a one time comparison, this method has the best performance. If a
  298        * given String will be involved in multiple comparisons, CollationKey.compareTo
  299        * has the best performance. See the Collator class description for an example
  300        * using CollationKeys.
  301        * @param source the source string.
  302        * @param target the target string.
  303        * @return Returns an integer value. Value is less than zero if source is less than
  304        * target, value is zero if source and target are equal, value is greater than zero
  305        * if source is greater than target.
  306        * @see java.text.CollationKey
  307        * @see java.text.Collator#getCollationKey
  308        */
  309       public abstract int compare(String source, String target);
  310   
  311       /**
  312        * Compares its two arguments for order.  Returns a negative integer,
  313        * zero, or a positive integer as the first argument is less than, equal
  314        * to, or greater than the second.
  315        * <p>
  316        * This implementation merely returns
  317        *  <code> compare((String)o1, (String)o2) </code>.
  318        *
  319        * @return a negative integer, zero, or a positive integer as the
  320        *         first argument is less than, equal to, or greater than the
  321        *         second.
  322        * @exception ClassCastException the arguments cannot be cast to Strings.
  323        * @see java.util.Comparator
  324        * @since   1.2
  325        */
  326       public int compare(Object o1, Object o2) {
  327       return compare((String)o1, (String)o2);
  328       }
  329   
  330       /**
  331        * Transforms the String into a series of bits that can be compared bitwise
  332        * to other CollationKeys. CollationKeys provide better performance than
  333        * Collator.compare when Strings are involved in multiple comparisons.
  334        * See the Collator class description for an example using CollationKeys.
  335        * @param source the string to be transformed into a collation key.
  336        * @return the CollationKey for the given String based on this Collator's collation
  337        * rules. If the source String is null, a null CollationKey is returned.
  338        * @see java.text.CollationKey
  339        * @see java.text.Collator#compare
  340        */
  341       public abstract CollationKey getCollationKey(String source);
  342   
  343       /**
  344        * Convenience method for comparing the equality of two strings based on
  345        * this Collator's collation rules.
  346        * @param source the source string to be compared with.
  347        * @param target the target string to be compared with.
  348        * @return true if the strings are equal according to the collation
  349        * rules.  false, otherwise.
  350        * @see java.text.Collator#compare
  351        */
  352       public boolean equals(String source, String target)
  353       {
  354           return (compare(source, target) == Collator.EQUAL);
  355       }
  356   
  357       /**
  358        * Returns this Collator's strength property.  The strength property determines
  359        * the minimum level of difference considered significant during comparison.
  360        * See the Collator class description for an example of use.
  361        * @return this Collator's current strength property.
  362        * @see java.text.Collator#setStrength
  363        * @see java.text.Collator#PRIMARY
  364        * @see java.text.Collator#SECONDARY
  365        * @see java.text.Collator#TERTIARY
  366        * @see java.text.Collator#IDENTICAL
  367        */
  368       public synchronized int getStrength()
  369       {
  370           return strength;
  371       }
  372   
  373       /**
  374        * Sets this Collator's strength property.  The strength property determines
  375        * the minimum level of difference considered significant during comparison.
  376        * See the Collator class description for an example of use.
  377        * @param newStrength  the new strength value.
  378        * @see java.text.Collator#getStrength
  379        * @see java.text.Collator#PRIMARY
  380        * @see java.text.Collator#SECONDARY
  381        * @see java.text.Collator#TERTIARY
  382        * @see java.text.Collator#IDENTICAL
  383        * @exception  IllegalArgumentException If the new strength value is not one of
  384        * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
  385        */
  386       public synchronized void setStrength(int newStrength) {
  387           if ((newStrength != PRIMARY) &&
  388               (newStrength != SECONDARY) &&
  389               (newStrength != TERTIARY) &&
  390               (newStrength != IDENTICAL))
  391               throw new IllegalArgumentException("Incorrect comparison level.");
  392           strength = newStrength;
  393       }
  394   
  395       /**
  396        * Get the decomposition mode of this Collator. Decomposition mode
  397        * determines how Unicode composed characters are handled. Adjusting
  398        * decomposition mode allows the user to select between faster and more
  399        * complete collation behavior.
  400        * <p>The three values for decomposition mode are:
  401        * <UL>
  402        * <LI>NO_DECOMPOSITION,
  403        * <LI>CANONICAL_DECOMPOSITION
  404        * <LI>FULL_DECOMPOSITION.
  405        * </UL>
  406        * See the documentation for these three constants for a description
  407        * of their meaning.
  408        * @return the decomposition mode
  409        * @see java.text.Collator#setDecomposition
  410        * @see java.text.Collator#NO_DECOMPOSITION
  411        * @see java.text.Collator#CANONICAL_DECOMPOSITION
  412        * @see java.text.Collator#FULL_DECOMPOSITION
  413        */
  414       public synchronized int getDecomposition()
  415       {
  416           return decmp;
  417       }
  418       /**
  419        * Set the decomposition mode of this Collator. See getDecomposition
  420        * for a description of decomposition mode.
  421        * @param decompositionMode  the new decomposition mode.
  422        * @see java.text.Collator#getDecomposition
  423        * @see java.text.Collator#NO_DECOMPOSITION
  424        * @see java.text.Collator#CANONICAL_DECOMPOSITION
  425        * @see java.text.Collator#FULL_DECOMPOSITION
  426        * @exception IllegalArgumentException If the given value is not a valid decomposition
  427        * mode.
  428        */
  429       public synchronized void setDecomposition(int decompositionMode) {
  430           if ((decompositionMode != NO_DECOMPOSITION) &&
  431               (decompositionMode != CANONICAL_DECOMPOSITION) &&
  432               (decompositionMode != FULL_DECOMPOSITION))
  433               throw new IllegalArgumentException("Wrong decomposition mode.");
  434           decmp = decompositionMode;
  435       }
  436   
  437       /**
  438        * Returns an array of all locales for which the
  439        * <code>getInstance</code> methods of this class can return
  440        * localized instances.
  441        * The returned array represents the union of locales supported
  442        * by the Java runtime and by installed
  443        * {@link java.text.spi.CollatorProvider CollatorProvider} implementations.
  444        * It must contain at least a Locale instance equal to
  445        * {@link java.util.Locale#US Locale.US}.
  446        *
  447        * @return An array of locales for which localized
  448        *         <code>Collator</code> instances are available.
  449        */
  450       public static synchronized Locale[] getAvailableLocales() {
  451           LocaleServiceProviderPool pool =
  452               LocaleServiceProviderPool.getPool(CollatorProvider.class);
  453           return pool.getAvailableLocales();
  454       }
  455   
  456       /**
  457        * Overrides Cloneable
  458        */
  459       public Object clone()
  460       {
  461           try {
  462               return (Collator)super.clone();
  463           } catch (CloneNotSupportedException e) {
  464               throw new InternalError();
  465           }
  466       }
  467   
  468       /**
  469        * Compares the equality of two Collators.
  470        * @param that the Collator to be compared with this.
  471        * @return true if this Collator is the same as that Collator;
  472        * false otherwise.
  473        */
  474       public boolean equals(Object that)
  475       {
  476           if (this == that) return true;
  477           if (that == null) return false;
  478           if (getClass() != that.getClass()) return false;
  479           Collator other = (Collator) that;
  480           return ((strength == other.strength) &&
  481                   (decmp == other.decmp));
  482       }
  483   
  484       /**
  485        * Generates the hash code for this Collator.
  486        */
  487       abstract public int hashCode();
  488   
  489       /**
  490        * Default constructor.  This constructor is
  491        * protected so subclasses can get access to it. Users typically create
  492        * a Collator sub-class by calling the factory method getInstance.
  493        * @see java.text.Collator#getInstance
  494        */
  495       protected Collator()
  496       {
  497           strength = TERTIARY;
  498           decmp = CANONICAL_DECOMPOSITION;
  499       }
  500   
  501       private int strength = 0;
  502       private int decmp = 0;
  503       private static SoftCache cache = new SoftCache();
  504   
  505       //
  506       // FIXME: These three constants should be removed.
  507       //
  508       /**
  509        * LESS is returned if source string is compared to be less than target
  510        * string in the compare() method.
  511        * @see java.text.Collator#compare
  512        */
  513       final static int LESS = -1;
  514       /**
  515        * EQUAL is returned if source string is compared to be equal to target
  516        * string in the compare() method.
  517        * @see java.text.Collator#compare
  518        */
  519       final static int EQUAL = 0;
  520       /**
  521        * GREATER is returned if source string is compared to be greater than
  522        * target string in the compare() method.
  523        * @see java.text.Collator#compare
  524        */
  525       final static int GREATER = 1;
  526   
  527       /**
  528        * Obtains a Collator instance from a CollatorProvider
  529        * implementation.
  530        */
  531       private static class CollatorGetter
  532           implements LocaleServiceProviderPool.LocalizedObjectGetter<CollatorProvider, Collator> {
  533           private static final CollatorGetter INSTANCE = new CollatorGetter();
  534   
  535           public Collator getObject(CollatorProvider collatorProvider,
  536                                   Locale locale,
  537                                   String key,
  538                                   Object... params) {
  539               assert params.length == 1;
  540               Collator result = collatorProvider.getInstance(locale);
  541               if (result != null) {
  542                   // put this Collator instance in the cache for two locales, one
  543                   // is for the desired locale, and the other is for the actual
  544                   // locale where the provider is found, which may be a fall back locale.
  545                   cache.put((Locale)params[0], result);
  546                   cache.put(locale, result);
  547                   return (Collator)result.clone();
  548               }
  549   
  550               return null;
  551           }
  552       }
  553    }

Save This Page
Home » openjdk-7 » java » text » [javadoc | source]