Save This Page
Home » openjdk-7 » java » text » [javadoc | source]
    1   /*
    2    * Copyright 1996-2001 Sun Microsystems, Inc.  All Rights Reserved.
    3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4    *
    5    * This code is free software; you can redistribute it and/or modify it
    6    * under the terms of the GNU General Public License version 2 only, as
    7    * published by the Free Software Foundation.  Sun designates this
    8    * particular file as subject to the "Classpath" exception as provided
    9    * by Sun in the LICENSE file that accompanied this code.
   10    *
   11    * This code is distributed in the hope that it will be useful, but WITHOUT
   12    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13    * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   14    * version 2 for more details (a copy is included in the LICENSE file that
   15    * accompanied this code).
   16    *
   17    * You should have received a copy of the GNU General Public License version
   18    * 2 along with this work; if not, write to the Free Software Foundation,
   19    * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   20    *
   21    * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
   22    * CA 95054 USA or visit www.sun.com if you need additional information or
   23    * have any questions.
   24    */
   25   
   26   /*
   27    * (C) Copyright Taligent, Inc. 1996,1997 - All Rights Reserved
   28    * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
   29    *
   30    *   The original version of this source code and documentation is copyrighted
   31    * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
   32    * materials are provided under terms of a License Agreement between Taligent
   33    * and Sun. This technology is protected by multiple US and International
   34    * patents. This notice and attribution to Taligent may not be removed.
   35    *   Taligent is a registered trademark of Taligent, Inc.
   36    *
   37    */
   38   
   39   package java.text;
   40   /**
   41    * CollationRules contains the default en_US collation rules as a base
   42    * for building other collation tables.
   43    * <p>Note that decompositions are done before these rules are used,
   44    * so they do not have to contain accented characters, such as A-grave.
   45    * @see                RuleBasedCollator
   46    * @see                LocaleElements
   47    * @author             Helena Shih, Mark Davis
   48    */
   49   final class CollationRules {
   50       final static String DEFAULTRULES = new String(
   51           "" // no FRENCH accent order by default, add in French Delta
   52           // IGNORABLES (up to first < character)
   53           // COMPLETELY IGNORE format characters
   54           + "='\u200B'=\u200C=\u200D=\u200E=\u200F"
   55           // Control Characters
   56           + "=\u0000 =\u0001 =\u0002 =\u0003 =\u0004" //null, .. eot
   57           + "=\u0005 =\u0006 =\u0007 =\u0008 ='\u0009'" //enq, ...
   58           + "='\u000b' =\u000e" //vt,, so
   59           + "=\u000f ='\u0010' =\u0011 =\u0012 =\u0013" //si, dle, dc1, dc2, dc3
   60           + "=\u0014 =\u0015 =\u0016 =\u0017 =\u0018" //dc4, nak, syn, etb, can
   61           + "=\u0019 =\u001a =\u001b =\u001c =\u001d" //em, sub, esc, fs, gs
   62           + "=\u001e =\u001f =\u007f"                   //rs, us, del
   63           //....then the C1 Latin 1 reserved control codes
   64           + "=\u0080 =\u0081 =\u0082 =\u0083 =\u0084 =\u0085"
   65           + "=\u0086 =\u0087 =\u0088 =\u0089 =\u008a =\u008b"
   66           + "=\u008c =\u008d =\u008e =\u008f =\u0090 =\u0091"
   67           + "=\u0092 =\u0093 =\u0094 =\u0095 =\u0096 =\u0097"
   68           + "=\u0098 =\u0099 =\u009a =\u009b =\u009c =\u009d"
   69           + "=\u009e =\u009f"
   70           // IGNORE except for secondary, tertiary difference
   71           // Spaces
   72           + ";'\u0020';'\u00A0'"                  // spaces
   73           + ";'\u2000';'\u2001';'\u2002';'\u2003';'\u2004'"  // spaces
   74           + ";'\u2005';'\u2006';'\u2007';'\u2008';'\u2009'"  // spaces
   75           + ";'\u200A';'\u3000';'\uFEFF'"                // spaces
   76           + ";'\r' ;'\t' ;'\n';'\f';'\u000b'"  // whitespace
   77   
   78           // Non-spacing accents
   79   
   80           + ";\u0301"          // non-spacing acute accent
   81           + ";\u0300"          // non-spacing grave accent
   82           + ";\u0306"          // non-spacing breve accent
   83           + ";\u0302"          // non-spacing circumflex accent
   84           + ";\u030c"          // non-spacing caron/hacek accent
   85           + ";\u030a"          // non-spacing ring above accent
   86           + ";\u030d"          // non-spacing vertical line above
   87           + ";\u0308"          // non-spacing diaeresis accent
   88           + ";\u030b"          // non-spacing double acute accent
   89           + ";\u0303"          // non-spacing tilde accent
   90           + ";\u0307"          // non-spacing dot above/overdot accent
   91           + ";\u0304"          // non-spacing macron accent
   92           + ";\u0337"          // non-spacing short slash overlay (overstruck diacritic)
   93           + ";\u0327"          // non-spacing cedilla accent
   94           + ";\u0328"          // non-spacing ogonek accent
   95           + ";\u0323"          // non-spacing dot-below/underdot accent
   96           + ";\u0332"          // non-spacing underscore/underline accent
   97           // with the rest of the general diacritical marks in binary order
   98           + ";\u0305"          // non-spacing overscore/overline
   99           + ";\u0309"          // non-spacing hook above
  100           + ";\u030e"          // non-spacing double vertical line above
  101           + ";\u030f"          // non-spacing double grave
  102           + ";\u0310"          // non-spacing chandrabindu
  103           + ";\u0311"          // non-spacing inverted breve
  104           + ";\u0312"          // non-spacing turned comma above/cedilla above
  105           + ";\u0313"          // non-spacing comma above
  106           + ";\u0314"          // non-spacing reversed comma above
  107           + ";\u0315"          // non-spacing comma above right
  108           + ";\u0316"          // non-spacing grave below
  109           + ";\u0317"          // non-spacing acute below
  110           + ";\u0318"          // non-spacing left tack below
  111           + ";\u0319"          // non-spacing tack below
  112           + ";\u031a"          // non-spacing left angle above
  113           + ";\u031b"          // non-spacing horn
  114           + ";\u031c"          // non-spacing left half ring below
  115           + ";\u031d"          // non-spacing up tack below
  116           + ";\u031e"          // non-spacing down tack below
  117           + ";\u031f"          // non-spacing plus sign below
  118           + ";\u0320"          // non-spacing minus sign below
  119           + ";\u0321"          // non-spacing palatalized hook below
  120           + ";\u0322"          // non-spacing retroflex hook below
  121           + ";\u0324"          // non-spacing double dot below
  122           + ";\u0325"          // non-spacing ring below
  123           + ";\u0326"          // non-spacing comma below
  124           + ";\u0329"          // non-spacing vertical line below
  125           + ";\u032a"          // non-spacing bridge below
  126           + ";\u032b"          // non-spacing inverted double arch below
  127           + ";\u032c"          // non-spacing hacek below
  128           + ";\u032d"          // non-spacing circumflex below
  129           + ";\u032e"          // non-spacing breve below
  130           + ";\u032f"          // non-spacing inverted breve below
  131           + ";\u0330"          // non-spacing tilde below
  132           + ";\u0331"          // non-spacing macron below
  133           + ";\u0333"          // non-spacing double underscore
  134           + ";\u0334"          // non-spacing tilde overlay
  135           + ";\u0335"          // non-spacing short bar overlay
  136           + ";\u0336"          // non-spacing long bar overlay
  137           + ";\u0338"          // non-spacing long slash overlay
  138           + ";\u0339"          // non-spacing right half ring below
  139           + ";\u033a"          // non-spacing inverted bridge below
  140           + ";\u033b"          // non-spacing square below
  141           + ";\u033c"          // non-spacing seagull below
  142           + ";\u033d"          // non-spacing x above
  143           + ";\u033e"          // non-spacing vertical tilde
  144           + ";\u033f"          // non-spacing double overscore
  145           //+ ";\u0340"          // non-spacing grave tone mark == \u0300
  146           //+ ";\u0341"          // non-spacing acute tone mark == \u0301
  147           + ";\u0342;"
  148           //+ "\u0343;" // == \u0313
  149           + "\u0344;\u0345;\u0360;\u0361"    // newer
  150           + ";\u0483;\u0484;\u0485;\u0486"    // Cyrillic accents
  151   
  152           + ";\u20D0;\u20D1;\u20D2"           // symbol accents
  153           + ";\u20D3;\u20D4;\u20D5"           // symbol accents
  154           + ";\u20D6;\u20D7;\u20D8"           // symbol accents
  155           + ";\u20D9;\u20DA;\u20DB"           // symbol accents
  156           + ";\u20DC;\u20DD;\u20DE"           // symbol accents
  157           + ";\u20DF;\u20E0;\u20E1"           // symbol accents
  158   
  159           + ",'\u002D';\u00AD"                // dashes
  160           + ";\u2010;\u2011;\u2012"           // dashes
  161           + ";\u2013;\u2014;\u2015"           // dashes
  162           + ";\u2212"                         // dashes
  163   
  164           // other punctuation
  165   
  166           + "<'\u005f'"        // underline/underscore (spacing)
  167           + "<\u00af"          // overline or macron (spacing)
  168           + "<'\u002c'"        // comma (spacing)
  169           + "<'\u003b'"        // semicolon
  170           + "<'\u003a'"        // colon
  171           + "<'\u0021'"        // exclamation point
  172           + "<\u00a1"          // inverted exclamation point
  173           + "<'\u003f'"        // question mark
  174           + "<\u00bf"          // inverted question mark
  175           + "<'\u002f'"        // slash
  176           + "<'\u002e'"        // period/full stop
  177           + "<\u00b4"          // acute accent (spacing)
  178           + "<'\u0060'"        // grave accent (spacing)
  179           + "<'\u005e'"        // circumflex accent (spacing)
  180           + "<\u00a8"          // diaresis/umlaut accent (spacing)
  181           + "<'\u007e'"        // tilde accent (spacing)
  182           + "<\u00b7"          // middle dot (spacing)
  183           + "<\u00b8"          // cedilla accent (spacing)
  184           + "<'\u0027'"        // apostrophe
  185           + "<'\"'"            // quotation marks
  186           + "<\u00ab"          // left angle quotes
  187           + "<\u00bb"          // right angle quotes
  188           + "<'\u0028'"        // left parenthesis
  189           + "<'\u0029'"        // right parenthesis
  190           + "<'\u005b'"        // left bracket
  191           + "<'\u005d'"        // right bracket
  192           + "<'\u007b'"        // left brace
  193           + "<'\u007d'"        // right brace
  194           + "<\u00a7"          // section symbol
  195           + "<\u00b6"          // paragraph symbol
  196           + "<\u00a9"          // copyright symbol
  197           + "<\u00ae"          // registered trademark symbol
  198           + "<'\u0040'"          // at sign
  199           + "<\u00a4"          // international currency symbol
  200           + "<\u0e3f"          // baht sign
  201           + "<\u00a2"          // cent sign
  202           + "<\u20a1"          // colon sign
  203           + "<\u20a2"          // cruzeiro sign
  204           + "<'\u0024'"        // dollar sign
  205           + "<\u20ab"          // dong sign
  206           + "<\u20ac"          // euro sign
  207           + "<\u20a3"          // franc sign
  208           + "<\u20a4"          // lira sign
  209           + "<\u20a5"          // mill sign
  210           + "<\u20a6"          // naira sign
  211           + "<\u20a7"          // peseta sign
  212           + "<\u00a3"          // pound-sterling sign
  213           + "<\u20a8"          // rupee sign
  214           + "<\u20aa"          // new shekel sign
  215           + "<\u20a9"          // won sign
  216           + "<\u00a5"          // yen sign
  217           + "<'\u002a'"        // asterisk
  218           + "<'\\'"            // backslash
  219           + "<'\u0026'"        // ampersand
  220           + "<'\u0023'"        // number sign
  221           + "<'\u0025'"        // percent sign
  222           + "<'\u002b'"        // plus sign
  223           + "<\u00b1"          // plus-or-minus sign
  224           + "<\u00f7"          // divide sign
  225           + "<\u00d7"          // multiply sign
  226           + "<'\u003c'"        // less-than sign
  227           + "<'\u003d'"        // equal sign
  228           + "<'\u003e'"        // greater-than sign
  229           + "<\u00ac"          // end of line symbol/logical NOT symbol
  230           + "<'\u007c'"          // vertical line/logical OR symbol
  231           + "<\u00a6"          // broken vertical line
  232           + "<\u00b0"          // degree symbol
  233           + "<\u00b5"          // micro symbol
  234   
  235           // NUMERICS
  236   
  237           + "<0<1<2<3<4<5<6<7<8<9"
  238           + "<\u00bc<\u00bd<\u00be"   // 1/4,1/2,3/4 fractions
  239   
  240           // NON-IGNORABLES
  241           + "<a,A"
  242           + "<b,B"
  243           + "<c,C"
  244           + "<d,D"
  245           + "<\u00F0,\u00D0"                  // eth
  246           + "<e,E"
  247           + "<f,F"
  248           + "<g,G"
  249           + "<h,H"
  250           + "<i,I"
  251           + "<j,J"
  252           + "<k,K"
  253           + "<l,L"
  254           + "<m,M"
  255           + "<n,N"
  256           + "<o,O"
  257           + "<p,P"
  258           + "<q,Q"
  259           + "<r,R"
  260           + "<s, S & SS,\u00DF"             // s-zet
  261           + "<t,T"
  262           + "& TH, \u00DE &TH, \u00FE "     // thorn
  263           + "<u,U"
  264           + "<v,V"
  265           + "<w,W"
  266           + "<x,X"
  267           + "<y,Y"
  268           + "<z,Z"
  269           + "&AE,\u00C6"                    // ae & AE ligature
  270           + "&AE,\u00E6"
  271           + "&OE,\u0152"                    // oe & OE ligature
  272           + "&OE,\u0153"
  273       );
  274   }

Save This Page
Home » openjdk-7 » java » text » [javadoc | source]