Home » JBoss-5.1.0 » org » jboss » util » xml » [javadoc | source]

    1   /*
    2    * The Apache Software License, Version 1.1
    3    *
    4    *
    5    * Copyright (c) 1999 The Apache Software Foundation.  All rights 
    6    * reserved.
    7    *
    8    * Redistribution and use in source and binary forms, with or without
    9    * modification, are permitted provided that the following conditions
   10    * are met:
   11    *
   12    * 1. Redistributions of source code must retain the above copyright
   13    *    notice, this list of conditions and the following disclaimer. 
   14    *
   15    * 2. Redistributions in binary form must reproduce the above copyright
   16    *    notice, this list of conditions and the following disclaimer in
   17    *    the documentation and/or other materials provided with the
   18    *    distribution.
   19    *
   20    * 3. The end-user documentation included with the redistribution,
   21    *    if any, must include the following acknowledgment:  
   22    *       "This product includes software developed by the
   23    *        Apache Software Foundation (http://www.apache.org/)."
   24    *    Alternately, this acknowledgment may appear in the software itself,
   25    *    if and wherever such third-party acknowledgments normally appear.
   26    *
   27    * 4. The names "Xerces" and "Apache Software Foundation" must
   28    *    not be used to endorse or promote products derived from this
   29    *    software without prior written permission. For written 
   30    *    permission, please contact apache@apache.org.
   31    *
   32    * 5. Products derived from this software may not be called "Apache",
   33    *    nor may "Apache" appear in their name, without prior written
   34    *    permission of the Apache Software Foundation.
   35    *
   36    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   37    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   38    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   39    * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   40    * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   41    * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   42    * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   43    * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   44    * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   45    * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   46    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   47    * SUCH DAMAGE.
   48    * ====================================================================
   49    *
   50    * This software consists of voluntary contributions made by many
   51    * individuals on behalf of the Apache Software Foundation and was
   52    * originally based on software copyright (c) 1999, International
   53    * Business Machines, Inc., http://www.apache.org.  For more
   54    * information on the Apache Software Foundation, please see
   55    * <http://www.apache.org/>.
   56    */
   57   
   58   package org.jboss.util.xml;
   59   
   60   import java.util;
   61   
   62   /**
   63    * MIME2Java is a convenience class which handles conversions between MIME charset names
   64    * and Java encoding names.
   65    * <p>The supported XML encodings are the intersection of XML-supported code sets and those 
   66    * supported in JDK 1.1.
   67    * <p>MIME charset names are used on <var>xmlEncoding</var> parameters to methods such
   68    * as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>.
   69    * <p>Java encoding names are used on <var>encoding</var> parameters to
   70    * methods such as <code>TXDocument#printWithFormat</code> and <code>DTD#printExternal</code>. 
   71    * <P>
   72    * <TABLE BORDER="0" WIDTH="100%">
   73    *  <TR>
   74    *      <TD WIDTH="33%">
   75    *          <P ALIGN="CENTER"><B>Common Name</B>
   76    *      </TD>
   77    *      <TD WIDTH="15%">
   78    *          <P ALIGN="CENTER"><B>Use this name in XML files</B>
   79    *      </TD>
   80    *      <TD WIDTH="12%">
   81    *          <P ALIGN="CENTER"><B>Name Type</B>
   82    *      </TD>
   83    *      <TD WIDTH="31%">
   84    *          <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B>
   85    *      </TD>
   86    *  </TR>
   87    *  <TR>
   88    *      <TD WIDTH="33%">8 bit Unicode</TD>
   89    *      <TD WIDTH="15%">
   90    *          <P ALIGN="CENTER">UTF-8
   91    *      </TD>
   92    *      <TD WIDTH="12%">
   93    *          <P ALIGN="CENTER">IANA
   94    *      </TD>
   95    *      <TD WIDTH="31%">
   96    *          <P ALIGN="CENTER">UTF8
   97    *      </TD>
   98    *  </TR>
   99    *  <TR>
  100    *      <TD WIDTH="33%">ISO Latin 1</TD>
  101    *      <TD WIDTH="15%">
  102    *          <P ALIGN="CENTER">ISO-8859-1
  103    *      </TD>
  104    *      <TD WIDTH="12%">
  105    *          <P ALIGN="CENTER">MIME
  106    *      </TD>
  107    *      <TD WIDTH="31%">
  108    *          <P ALIGN="CENTER">ISO-8859-1
  109    *      </TD>
  110    *  </TR>
  111    *  <TR>
  112    *      <TD WIDTH="33%">ISO Latin 2</TD>
  113    *      <TD WIDTH="15%">
  114    *          <P ALIGN="CENTER">ISO-8859-2
  115    *      </TD>
  116    *      <TD WIDTH="12%">
  117    *          <P ALIGN="CENTER">MIME
  118    *      </TD>
  119    *      <TD WIDTH="31%">
  120    *          <P ALIGN="CENTER">ISO-8859-2
  121    *      </TD>
  122    *  </TR>
  123    *  <TR>
  124    *      <TD WIDTH="33%">ISO Latin 3</TD>
  125    *      <TD WIDTH="15%">
  126    *          <P ALIGN="CENTER">ISO-8859-3
  127    *      </TD>
  128    *      <TD WIDTH="12%">
  129    *          <P ALIGN="CENTER">MIME
  130    *      </TD>
  131    *      <TD WIDTH="31%">
  132    *          <P ALIGN="CENTER">ISO-8859-3
  133    *      </TD>
  134    *  </TR>
  135    *  <TR>
  136    *      <TD WIDTH="33%">ISO Latin 4</TD>
  137    *      <TD WIDTH="15%">
  138    *          <P ALIGN="CENTER">ISO-8859-4
  139    *      </TD>
  140    *      <TD WIDTH="12%">
  141    *          <P ALIGN="CENTER">MIME
  142    *      </TD>
  143    *      <TD WIDTH="31%">
  144    *          <P ALIGN="CENTER">ISO-8859-4
  145    *      </TD>
  146    *  </TR>
  147    *  <TR>
  148    *      <TD WIDTH="33%">ISO Latin Cyrillic</TD>
  149    *      <TD WIDTH="15%">
  150    *          <P ALIGN="CENTER">ISO-8859-5
  151    *      </TD>
  152    *      <TD WIDTH="12%">
  153    *          <P ALIGN="CENTER">MIME
  154    *      </TD>
  155    *      <TD WIDTH="31%">
  156    *          <P ALIGN="CENTER">ISO-8859-5
  157    *      </TD>
  158    *  </TR>
  159    *  <TR>
  160    *      <TD WIDTH="33%">ISO Latin Arabic</TD>
  161    *      <TD WIDTH="15%">
  162    *          <P ALIGN="CENTER">ISO-8859-6
  163    *      </TD>
  164    *      <TD WIDTH="12%">
  165    *          <P ALIGN="CENTER">MIME
  166    *      </TD>
  167    *      <TD WIDTH="31%">
  168    *          <P ALIGN="CENTER">ISO-8859-6
  169    *      </TD>
  170    *  </TR>
  171    *  <TR>
  172    *      <TD WIDTH="33%">ISO Latin Greek</TD>
  173    *      <TD WIDTH="15%">
  174    *          <P ALIGN="CENTER">ISO-8859-7
  175    *      </TD>
  176    *      <TD WIDTH="12%">
  177    *          <P ALIGN="CENTER">MIME
  178    *      </TD>
  179    *      <TD WIDTH="31%">
  180    *          <P ALIGN="CENTER">ISO-8859-7
  181    *      </TD>
  182    *  </TR>
  183    *  <TR>
  184    *      <TD WIDTH="33%">ISO Latin Hebrew</TD>
  185    *      <TD WIDTH="15%">
  186    *          <P ALIGN="CENTER">ISO-8859-8
  187    *      </TD>
  188    *      <TD WIDTH="12%">
  189    *          <P ALIGN="CENTER">MIME
  190    *      </TD>
  191    *      <TD WIDTH="31%">
  192    *          <P ALIGN="CENTER">ISO-8859-8
  193    *      </TD>
  194    *  </TR>
  195    *  <TR>
  196    *      <TD WIDTH="33%">ISO Latin 5</TD>
  197    *      <TD WIDTH="15%">
  198    *          <P ALIGN="CENTER">ISO-8859-9
  199    *      </TD>
  200    *      <TD WIDTH="12%">
  201    *          <P ALIGN="CENTER">MIME
  202    *      </TD>
  203    *      <TD WIDTH="31%">
  204    *          <P ALIGN="CENTER">ISO-8859-9
  205    *      </TD>
  206    *  </TR>
  207    *  <TR>
  208    *      <TD WIDTH="33%">EBCDIC: US</TD>
  209    *      <TD WIDTH="15%">
  210    *          <P ALIGN="CENTER">ebcdic-cp-us
  211    *      </TD>
  212    *      <TD WIDTH="12%">
  213    *          <P ALIGN="CENTER">IANA
  214    *      </TD>
  215    *      <TD WIDTH="31%">
  216    *          <P ALIGN="CENTER">cp037
  217    *      </TD>
  218    *  </TR>
  219    *  <TR>
  220    *      <TD WIDTH="33%">EBCDIC: Canada</TD>
  221    *      <TD WIDTH="15%">
  222    *          <P ALIGN="CENTER">ebcdic-cp-ca
  223    *      </TD>
  224    *      <TD WIDTH="12%">
  225    *          <P ALIGN="CENTER">IANA
  226    *      </TD>
  227    *      <TD WIDTH="31%">
  228    *          <P ALIGN="CENTER">cp037
  229    *      </TD>
  230    *  </TR>
  231    *  <TR>
  232    *      <TD WIDTH="33%">EBCDIC: Netherlands</TD>
  233    *      <TD WIDTH="15%">
  234    *          <P ALIGN="CENTER">ebcdic-cp-nl
  235    *      </TD>
  236    *      <TD WIDTH="12%">
  237    *          <P ALIGN="CENTER">IANA
  238    *      </TD>
  239    *      <TD WIDTH="31%">
  240    *          <P ALIGN="CENTER">cp037
  241    *      </TD>
  242    *  </TR>
  243    *  <TR>
  244    *      <TD WIDTH="33%">EBCDIC: Denmark</TD>
  245    *      <TD WIDTH="15%">
  246    *          <P ALIGN="CENTER">ebcdic-cp-dk
  247    *      </TD>
  248    *      <TD WIDTH="12%">
  249    *          <P ALIGN="CENTER">IANA
  250    *      </TD>
  251    *      <TD WIDTH="31%">
  252    *          <P ALIGN="CENTER">cp277
  253    *      </TD>
  254    *  </TR>
  255    *  <TR>
  256    *      <TD WIDTH="33%">EBCDIC: Norway</TD>
  257    *      <TD WIDTH="15%">
  258    *          <P ALIGN="CENTER">ebcdic-cp-no
  259    *      </TD>
  260    *      <TD WIDTH="12%">
  261    *          <P ALIGN="CENTER">IANA
  262    *      </TD>
  263    *      <TD WIDTH="31%">
  264    *          <P ALIGN="CENTER">cp277
  265    *      </TD>
  266    *  </TR>
  267    *  <TR>
  268    *      <TD WIDTH="33%">EBCDIC: Finland</TD>
  269    *      <TD WIDTH="15%">
  270    *          <P ALIGN="CENTER">ebcdic-cp-fi
  271    *      </TD>
  272    *      <TD WIDTH="12%">
  273    *          <P ALIGN="CENTER">IANA
  274    *      </TD>
  275    *      <TD WIDTH="31%">
  276    *          <P ALIGN="CENTER">cp278
  277    *      </TD>
  278    *  </TR>
  279    *  <TR>
  280    *      <TD WIDTH="33%">EBCDIC: Sweden</TD>
  281    *      <TD WIDTH="15%">
  282    *          <P ALIGN="CENTER">ebcdic-cp-se
  283    *      </TD>
  284    *      <TD WIDTH="12%">
  285    *          <P ALIGN="CENTER">IANA
  286    *      </TD>
  287    *      <TD WIDTH="31%">
  288    *          <P ALIGN="CENTER">cp278
  289    *      </TD>
  290    *  </TR>
  291    *  <TR>
  292    *      <TD WIDTH="33%">EBCDIC: Italy</TD>
  293    *      <TD WIDTH="15%">
  294    *          <P ALIGN="CENTER">ebcdic-cp-it
  295    *      </TD>
  296    *      <TD WIDTH="12%">
  297    *          <P ALIGN="CENTER">IANA
  298    *      </TD>
  299    *      <TD WIDTH="31%">
  300    *          <P ALIGN="CENTER">cp280
  301    *      </TD>
  302    *  </TR>
  303    *  <TR>
  304    *      <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD>
  305    *      <TD WIDTH="15%">
  306    *          <P ALIGN="CENTER">ebcdic-cp-es
  307    *      </TD>
  308    *      <TD WIDTH="12%">
  309    *          <P ALIGN="CENTER">IANA
  310    *      </TD>
  311    *      <TD WIDTH="31%">
  312    *          <P ALIGN="CENTER">cp284
  313    *      </TD>
  314    *  </TR>
  315    *  <TR>
  316    *      <TD WIDTH="33%">EBCDIC: Great Britain</TD>
  317    *      <TD WIDTH="15%">
  318    *          <P ALIGN="CENTER">ebcdic-cp-gb
  319    *      </TD>
  320    *      <TD WIDTH="12%">
  321    *          <P ALIGN="CENTER">IANA
  322    *      </TD>
  323    *      <TD WIDTH="31%">
  324    *          <P ALIGN="CENTER">cp285
  325    *      </TD>
  326    *  </TR>
  327    *  <TR>
  328    *      <TD WIDTH="33%">EBCDIC: France</TD>
  329    *      <TD WIDTH="15%">
  330    *          <P ALIGN="CENTER">ebcdic-cp-fr
  331    *      </TD>
  332    *      <TD WIDTH="12%">
  333    *          <P ALIGN="CENTER">IANA
  334    *      </TD>
  335    *      <TD WIDTH="31%">
  336    *          <P ALIGN="CENTER">cp297
  337    *      </TD>
  338    *  </TR>
  339    *  <TR>
  340    *      <TD WIDTH="33%">EBCDIC: Arabic</TD>
  341    *      <TD WIDTH="15%">
  342    *          <P ALIGN="CENTER">ebcdic-cp-ar1
  343    *      </TD>
  344    *      <TD WIDTH="12%">
  345    *          <P ALIGN="CENTER">IANA
  346    *      </TD>
  347    *      <TD WIDTH="31%">
  348    *          <P ALIGN="CENTER">cp420
  349    *      </TD>
  350    *  </TR>
  351    *  <TR>
  352    *      <TD WIDTH="33%">EBCDIC: Hebrew</TD>
  353    *      <TD WIDTH="15%">
  354    *          <P ALIGN="CENTER">ebcdic-cp-he
  355    *      </TD>
  356    *      <TD WIDTH="12%">
  357    *          <P ALIGN="CENTER">IANA
  358    *      </TD>
  359    *      <TD WIDTH="31%">
  360    *          <P ALIGN="CENTER">cp424
  361    *      </TD>
  362    *  </TR>
  363    *  <TR>
  364    *      <TD WIDTH="33%">EBCDIC: Switzerland</TD>
  365    *      <TD WIDTH="15%">
  366    *          <P ALIGN="CENTER">ebcdic-cp-ch
  367    *      </TD>
  368    *      <TD WIDTH="12%">
  369    *          <P ALIGN="CENTER">IANA
  370    *      </TD>
  371    *      <TD WIDTH="31%">
  372    *          <P ALIGN="CENTER">cp500
  373    *      </TD>
  374    *  </TR>
  375    *  <TR>
  376    *      <TD WIDTH="33%">EBCDIC: Roece</TD>
  377    *      <TD WIDTH="15%">
  378    *          <P ALIGN="CENTER">ebcdic-cp-roece
  379    *      </TD>
  380    *      <TD WIDTH="12%">
  381    *          <P ALIGN="CENTER">IANA
  382    *      </TD>
  383    *      <TD WIDTH="31%">
  384    *          <P ALIGN="CENTER">cp870
  385    *      </TD>
  386    *  </TR>
  387    *  <TR>
  388    *      <TD WIDTH="33%">EBCDIC: Yogoslavia</TD>
  389    *      <TD WIDTH="15%">
  390    *          <P ALIGN="CENTER">ebcdic-cp-yu
  391    *      </TD>
  392    *      <TD WIDTH="12%">
  393    *          <P ALIGN="CENTER">IANA
  394    *      </TD>
  395    *      <TD WIDTH="31%">
  396    *          <P ALIGN="CENTER">cp870
  397    *      </TD>
  398    *  </TR>
  399    *  <TR>
  400    *      <TD WIDTH="33%">EBCDIC: Iceland</TD>
  401    *      <TD WIDTH="15%">
  402    *          <P ALIGN="CENTER">ebcdic-cp-is
  403    *      </TD>
  404    *      <TD WIDTH="12%">
  405    *          <P ALIGN="CENTER">IANA
  406    *      </TD>
  407    *      <TD WIDTH="31%">
  408    *          <P ALIGN="CENTER">cp871
  409    *      </TD>
  410    *  </TR>
  411    *  <TR>
  412    *      <TD WIDTH="33%">EBCDIC: Urdu</TD>
  413    *      <TD WIDTH="15%">
  414    *          <P ALIGN="CENTER">ebcdic-cp-ar2
  415    *      </TD>
  416    *      <TD WIDTH="12%">
  417    *          <P ALIGN="CENTER">IANA
  418    *      </TD>
  419    *      <TD WIDTH="31%">
  420    *          <P ALIGN="CENTER">cp918
  421    *      </TD>
  422    *  </TR>
  423    *  <TR>
  424    *      <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD>
  425    *      <TD WIDTH="15%">
  426    *          <P ALIGN="CENTER">gb2312
  427    *      </TD>
  428    *      <TD WIDTH="12%">
  429    *          <P ALIGN="CENTER">MIME
  430    *      </TD>
  431    *      <TD WIDTH="31%">
  432    *          <P ALIGN="CENTER">GB2312
  433    *      </TD>
  434    *  </TR>
  435    *  <TR>
  436    *      <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD>
  437    *      <TD WIDTH="15%">
  438    *          <P ALIGN="CENTER">euc-jp
  439    *      </TD>
  440    *      <TD WIDTH="12%">
  441    *          <P ALIGN="CENTER">MIME
  442    *      </TD>
  443    *      <TD WIDTH="31%">
  444    *          <P ALIGN="CENTER">eucjis
  445    *      </TD>
  446    *  </TR>
  447    *  <TR>
  448    *      <TD WIDTH="33%">Japanese: iso-2022-jp</TD>
  449    *      <TD WIDTH="15%">
  450    *          <P ALIGN="CENTER">iso-2020-jp
  451    *      </TD>
  452    *      <TD WIDTH="12%">
  453    *          <P ALIGN="CENTER">MIME
  454    *      </TD>
  455    *      <TD WIDTH="31%">
  456    *          <P ALIGN="CENTER">JIS
  457    *      </TD>
  458    *  </TR>
  459    *  <TR>
  460    *      <TD WIDTH="33%">Japanese: Shift JIS</TD>
  461    *      <TD WIDTH="15%">
  462    *          <P ALIGN="CENTER">Shift_JIS
  463    *      </TD>
  464    *      <TD WIDTH="12%">
  465    *          <P ALIGN="CENTER">MIME
  466    *      </TD>
  467    *      <TD WIDTH="31%">
  468    *          <P ALIGN="CENTER">SJIS
  469    *      </TD>
  470    *  </TR>
  471    *  <TR>
  472    *      <TD WIDTH="33%">Japanese Windows: An extension of Shift JIS</TD>
  473    *      <TD WIDTH="15%">
  474    *          <P ALIGN="CENTER">Windows-31J
  475    *      </TD>
  476    *      <TD WIDTH="12%">
  477    *          <P ALIGN="CENTER">MIME
  478    *      </TD>
  479    *      <TD WIDTH="31%">
  480    *          <P ALIGN="CENTER">MS932 (since JDK 1.2)
  481    *      </TD>
  482    *  </TR>
  483    *  <TR>
  484    *      <TD WIDTH="33%">Chinese: Big5</TD>
  485    *      <TD WIDTH="15%">
  486    *          <P ALIGN="CENTER">Big5
  487    *      </TD>
  488    *      <TD WIDTH="12%">
  489    *          <P ALIGN="CENTER">MIME
  490    *      </TD>
  491    *      <TD WIDTH="31%">
  492    *          <P ALIGN="CENTER">Big5
  493    *      </TD>
  494    *  </TR>
  495    *  <TR>
  496    *      <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD>
  497    *      <TD WIDTH="15%">
  498    *          <P ALIGN="CENTER">euc-kr
  499    *      </TD>
  500    *      <TD WIDTH="12%">
  501    *          <P ALIGN="CENTER">MIME
  502    *      </TD>
  503    *      <TD WIDTH="31%">
  504    *          <P ALIGN="CENTER">iso2022kr
  505    *      </TD>
  506    *  </TR>
  507    *  <TR>
  508    *      <TD WIDTH="33%">Cyrillic</TD>
  509    *      <TD WIDTH="15%">
  510    *          <P ALIGN="CENTER">koi8-r
  511    *      </TD>
  512    *      <TD WIDTH="12%">
  513    *          <P ALIGN="CENTER">MIME
  514    *      </TD>
  515    *      <TD WIDTH="31%">
  516    *          <P ALIGN="CENTER">koi8-r
  517    *      </TD>
  518    *  </TR>
  519    * </TABLE>
  520    * 
  521    * @version $Id: MIME2Java.java,v 1.1 2002/02/16 10:35:22 user57 Exp $
  522    * @version ORIGINAL - MIME2Java.java,v 1.2 2000/12/14 19:21:46 lehors 
  523    * @author TAMURA Kent &lt;kent@trl.ibm.co.jp&gt;
  524    */
  525   public class MIME2Java {
  526       
  527       static private Hashtable s_enchash;
  528       static private Hashtable s_revhash;
  529       
  530       static {
  531           s_enchash = new Hashtable();
  532           //    <preferred MIME name (uppercase)>, <Java encoding name>
  533           s_enchash.put("UTF-8", "UTF8");
  534           s_enchash.put("US-ASCII",        "ASCII");
  535           s_enchash.put("ISO-8859-1",      "8859_1");
  536           s_enchash.put("ISO-8859-2",      "8859_2");
  537           s_enchash.put("ISO-8859-3",      "8859_3");
  538           s_enchash.put("ISO-8859-4",      "8859_4");
  539           s_enchash.put("ISO-8859-5",      "8859_5");
  540           s_enchash.put("ISO-8859-6",      "8859_6");
  541           s_enchash.put("ISO-8859-7",      "8859_7");
  542           s_enchash.put("ISO-8859-8",      "8859_8");
  543           s_enchash.put("ISO-8859-9",      "8859_9");
  544           s_enchash.put("ISO-2022-JP",     "JIS");
  545           s_enchash.put("SHIFT_JIS",       "SJIS");
  546           /**
  547            * MS932 is suitable for Windows-31J,
  548            * but JDK 1.1.x does not support MS932.
  549            */
  550           String version = System.getProperty("java.version");
  551           if (version.equals("1.1") || version.startsWith("1.1.")) {
  552               s_enchash.put("WINDOWS-31J",      "SJIS");
  553           } else {
  554               s_enchash.put("WINDOWS-31J",      "MS932");
  555           }
  556           s_enchash.put("EUC-JP",          "EUCJIS");
  557           s_enchash.put("GB2312",          "GB2312");
  558           s_enchash.put("BIG5",            "Big5");
  559           s_enchash.put("EUC-KR",          "KSC5601");
  560           s_enchash.put("ISO-2022-KR",     "ISO2022KR");
  561           s_enchash.put("KOI8-R",          "KOI8_R");
  562   
  563           s_enchash.put("EBCDIC-CP-US",    "CP037");
  564           s_enchash.put("EBCDIC-CP-CA",    "CP037");
  565           s_enchash.put("EBCDIC-CP-NL",    "CP037");
  566           s_enchash.put("EBCDIC-CP-DK",    "CP277");
  567           s_enchash.put("EBCDIC-CP-NO",    "CP277");
  568           s_enchash.put("EBCDIC-CP-FI",    "CP278");
  569           s_enchash.put("EBCDIC-CP-SE",    "CP278");
  570           s_enchash.put("EBCDIC-CP-IT",    "CP280");
  571           s_enchash.put("EBCDIC-CP-ES",    "CP284");
  572           s_enchash.put("EBCDIC-CP-GB",    "CP285");
  573           s_enchash.put("EBCDIC-CP-FR",    "CP297");
  574           s_enchash.put("EBCDIC-CP-AR1",   "CP420");
  575           s_enchash.put("EBCDIC-CP-HE",    "CP424");
  576           s_enchash.put("EBCDIC-CP-CH",    "CP500");
  577           s_enchash.put("EBCDIC-CP-ROECE", "CP870");
  578           s_enchash.put("EBCDIC-CP-YU",    "CP870");
  579           s_enchash.put("EBCDIC-CP-IS",    "CP871");
  580           s_enchash.put("EBCDIC-CP-AR2",   "CP918");
  581   
  582                                                   // j:CNS11643 -> EUC-TW?
  583                                                   // ISO-2022-CN? ISO-2022-CN-EXT?
  584                                                   
  585           s_revhash = new Hashtable();
  586           //    <Java encoding name (uppercase)>, <preferred MIME name>
  587           s_revhash.put("UTF8", "UTF-8");
  588           s_revhash.put("ASCII", "US-ASCII");
  589           s_revhash.put("8859_1", "ISO-8859-1");
  590           s_revhash.put("8859_2", "ISO-8859-2");
  591           s_revhash.put("8859_3", "ISO-8859-3");
  592           s_revhash.put("8859_4", "ISO-8859-4");
  593           s_revhash.put("8859_5", "ISO-8859-5");
  594           s_revhash.put("8859_6", "ISO-8859-6");
  595           s_revhash.put("8859_7", "ISO-8859-7");
  596           s_revhash.put("8859_8", "ISO-8859-8");
  597           s_revhash.put("8859_9", "ISO-8859-9");
  598           s_revhash.put("JIS", "ISO-2022-JP");
  599           s_revhash.put("SJIS", "Shift_JIS");
  600           s_revhash.put("MS932", "WINDOWS-31J");
  601           s_revhash.put("EUCJIS", "EUC-JP");
  602           s_revhash.put("GB2312", "GB2312");
  603           s_revhash.put("BIG5", "Big5");
  604           s_revhash.put("KSC5601", "EUC-KR");
  605           s_revhash.put("ISO2022KR", "ISO-2022-KR");
  606           s_revhash.put("KOI8_R", "KOI8-R");
  607   
  608           s_revhash.put("CP037", "EBCDIC-CP-US");
  609           s_revhash.put("CP037", "EBCDIC-CP-CA");
  610           s_revhash.put("CP037", "EBCDIC-CP-NL");
  611           s_revhash.put("CP277", "EBCDIC-CP-DK");
  612           s_revhash.put("CP277", "EBCDIC-CP-NO");
  613           s_revhash.put("CP278", "EBCDIC-CP-FI");
  614           s_revhash.put("CP278", "EBCDIC-CP-SE");
  615           s_revhash.put("CP280", "EBCDIC-CP-IT");
  616           s_revhash.put("CP284", "EBCDIC-CP-ES");
  617           s_revhash.put("CP285", "EBCDIC-CP-GB");
  618           s_revhash.put("CP297", "EBCDIC-CP-FR");
  619           s_revhash.put("CP420", "EBCDIC-CP-AR1");
  620           s_revhash.put("CP424", "EBCDIC-CP-HE");
  621           s_revhash.put("CP500", "EBCDIC-CP-CH");
  622           s_revhash.put("CP870", "EBCDIC-CP-ROECE");
  623           s_revhash.put("CP870", "EBCDIC-CP-YU");
  624           s_revhash.put("CP871", "EBCDIC-CP-IS");
  625           s_revhash.put("CP918", "EBCDIC-CP-AR2");
  626       }
  627   
  628       private MIME2Java() {
  629       }
  630   
  631       /**
  632        * Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name.
  633        * @param   mimeCharsetName Case insensitive MIME charset name: <code>UTF-8, US-ASCII, ISO-8859-1,
  634        *                          ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6,
  635        *                          ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS, Windows-31J
  636        *                          EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R,
  637        *                          EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK,
  638        *                          EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT,
  639        *                          EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1,
  640        *                          EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU,
  641        *                          EBCDIC-CP-IS and EBCDIC-CP-AR2</code>.
  642        * @return                  Java encoding name, or <var>null</var> if <var>mimeCharsetName</var>
  643        *                          is unknown.
  644        * @see #reverse
  645        */
  646       public static String convert(String mimeCharsetName) {
  647           return (String)s_enchash.get(mimeCharsetName.toUpperCase());
  648       }
  649   
  650       /**
  651        * Convert a Java encoding name to MIME charset name.
  652        * Available values of <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4",
  653        * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "MS932", "EUCJIS",
  654        * "GB2312", "BIG5", "KSC5601", "ISO2022KR",  "KOI8_R", "CP037", "CP277", "CP278",
  655        * "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918".
  656        * @param   encoding    Case insensitive Java encoding name: <code>UTF8, 8859_1, 8859_2, 8859_3,
  657        *                      8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, MS932, EUCJIS,
  658        *                      GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278,
  659        *                      CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871 
  660        *                      and CP918</code>.
  661        * @return              MIME charset name, or <var>null</var> if <var>encoding</var> is unknown.
  662        * @see #convert
  663        */
  664       public static String reverse(String encoding) {
  665           return (String)s_revhash.get(encoding.toUpperCase());
  666       }
  667   }

Home » JBoss-5.1.0 » org » jboss » util » xml » [javadoc | source]