Save This Page
Home » openjdk-7 » sun » io » [javadoc | source]
    1   /*
    2    * Copyright 1996-1997 Sun Microsystems, Inc.  All Rights Reserved.
    3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4    *
    5    * This code is free software; you can redistribute it and/or modify it
    6    * under the terms of the GNU General Public License version 2 only, as
    7    * published by the Free Software Foundation.  Sun designates this
    8    * particular file as subject to the "Classpath" exception as provided
    9    * by Sun in the LICENSE file that accompanied this code.
   10    *
   11    * This code is distributed in the hope that it will be useful, but WITHOUT
   12    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13    * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   14    * version 2 for more details (a copy is included in the LICENSE file that
   15    * accompanied this code).
   16    *
   17    * You should have received a copy of the GNU General Public License version
   18    * 2 along with this work; if not, write to the Free Software Foundation,
   19    * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   20    *
   21    * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
   22    * CA 95054 USA or visit www.sun.com if you need additional information or
   23    * have any questions.
   24    */
   25   package sun.io;
   26   
   27   
   28   /**
   29    * UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter
   30    *
   31    * see CharToByteUTF8.java about UTF-8 format
   32    */
   33   
   34   public class ByteToCharUTF8 extends ByteToCharConverter {
   35   
   36       private int savedSize;
   37       private byte[] savedBytes;
   38   
   39       public ByteToCharUTF8() {
   40           super();
   41           savedSize = 0;
   42           savedBytes = new byte[5];
   43       }
   44   
   45       public int flush(char[] output, int outStart, int outEnd)
   46           throws MalformedInputException
   47       {
   48           if (savedSize != 0) {
   49               savedSize = 0;
   50               badInputLength = 0;
   51               throw new MalformedInputException();
   52           }
   53           byteOff = charOff = 0;
   54           return 0;
   55       }
   56   
   57       /**
   58        * Character converson
   59        */
   60       public int convert(byte[] input, int inOff, int inEnd,
   61                          char[] output, int outOff, int outEnd)
   62           throws MalformedInputException, ConversionBufferFullException
   63       {
   64           int byte1, byte2, byte3, byte4;
   65           char[] outputChar = new char[2];
   66           int outputSize;
   67           int byteOffAdjustment = 0;
   68   
   69           if (savedSize != 0) {
   70               byte[] newBuf;
   71               newBuf = new byte[inEnd - inOff + savedSize];
   72               for (int i = 0; i < savedSize; i++) {
   73                   newBuf[i] = savedBytes[i];
   74               }
   75               System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
   76               input = newBuf;
   77               inOff = 0;
   78               inEnd = newBuf.length;
   79               byteOffAdjustment = -savedSize;
   80               savedSize = 0;
   81           }
   82   
   83           charOff = outOff;
   84           byteOff = inOff;
   85           int startByteOff;
   86   
   87           while(byteOff < inEnd) {
   88   
   89               startByteOff = byteOff;
   90               byte1 = input[byteOff++] & 0xff;
   91   
   92               if ((byte1 & 0x80) == 0){
   93                   outputChar[0] = (char)byte1;
   94                   outputSize = 1;
   95               } else if ((byte1 & 0xe0) == 0xc0) {
   96                   if (byteOff >= inEnd) {
   97                       savedSize = 1;
   98                       savedBytes[0] = (byte)byte1;
   99                       break;
  100                   }
  101                   byte2 = input[byteOff++] & 0xff;
  102                   if ((byte2 & 0xc0) != 0x80) {
  103                       badInputLength = 2;
  104                       byteOff += byteOffAdjustment;
  105                       throw new MalformedInputException();
  106                   }
  107                   outputChar[0] = (char)(((byte1 & 0x1f) << 6) | (byte2 & 0x3f));
  108                   outputSize = 1;
  109               } else if ((byte1 & 0xf0) == 0xe0){
  110                   if (byteOff + 1 >= inEnd) {
  111                           savedBytes[0] = (byte)byte1;
  112                       if (byteOff >= inEnd) {
  113                           savedSize = 1;
  114                       } else {
  115                           savedSize = 2;
  116                           savedBytes[1] = (byte)input[byteOff++];
  117                       }
  118                       break;
  119                   }
  120                   byte2 = input[byteOff++] & 0xff;
  121                   byte3 = input[byteOff++] & 0xff;
  122                   if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80) {
  123                       badInputLength = 3;
  124                       byteOff += byteOffAdjustment;
  125                       throw new MalformedInputException();
  126                   }
  127                   outputChar[0] = (char)(((byte1 & 0x0f) << 12)
  128                                          | ((byte2 & 0x3f) << 6)
  129                                          | (byte3 & 0x3f));
  130                   outputSize = 1;
  131               } else if ((byte1 & 0xf8) == 0xf0) {
  132                   if (byteOff + 2 >= inEnd) {
  133                       savedBytes[0] = (byte)byte1;
  134                       if (byteOff >= inEnd) {
  135                           savedSize = 1;
  136                       } else if (byteOff + 1 >= inEnd) {
  137                           savedSize = 2;
  138                           savedBytes[1] = (byte)input[byteOff++];
  139                       } else {
  140                           savedSize = 3;
  141                           savedBytes[1] = (byte)input[byteOff++];
  142                           savedBytes[2] = (byte)input[byteOff++];
  143                       }
  144                       break;
  145                   }
  146                   byte2 = input[byteOff++] & 0xff;
  147                   byte3 = input[byteOff++] & 0xff;
  148                   byte4 = input[byteOff++] & 0xff;
  149                   if ((byte2 & 0xc0) != 0x80 ||
  150                       (byte3 & 0xc0) != 0x80 ||
  151                       (byte4 & 0xc0) != 0x80) {
  152                       badInputLength = 4;
  153                       byteOff += byteOffAdjustment;
  154                       throw new MalformedInputException();
  155                   }
  156                   // this byte sequence is UTF16 character
  157                   int ucs4 = (int)(0x07 & byte1) << 18 |
  158                              (int)(0x3f & byte2) << 12 |
  159                              (int)(0x3f & byte3) <<  6 |
  160                              (int)(0x3f & byte4);
  161                   outputChar[0] = (char)((ucs4 - 0x10000) / 0x400 + 0xd800);
  162                   outputChar[1] = (char)((ucs4 - 0x10000) % 0x400 + 0xdc00);
  163                   outputSize = 2;
  164               } else {
  165                   badInputLength = 1;
  166                   byteOff += byteOffAdjustment;
  167                   throw new MalformedInputException();
  168               }
  169   
  170               if (charOff + outputSize > outEnd) {
  171                   byteOff = startByteOff;
  172                   byteOff += byteOffAdjustment;
  173                   throw new ConversionBufferFullException();
  174               }
  175   
  176               for (int i = 0; i < outputSize; i++) {
  177                   output[charOff + i] = outputChar[i];
  178               }
  179               charOff += outputSize;
  180           }
  181   
  182           byteOff += byteOffAdjustment;
  183           return charOff - outOff;
  184       }
  185   
  186       /*
  187        * Return the character set id
  188        */
  189       public String getCharacterEncoding() {
  190           return "UTF8";
  191       }
  192   
  193       /*
  194        *   Reset after finding bad input
  195        */
  196       public void reset() {
  197           byteOff = charOff = 0;
  198           savedSize = 0;
  199       }
  200   }

Save This Page
Home » openjdk-7 » sun » io » [javadoc | source]