Save This Page
Home » poi-src-3.2-FINAL-20081019 » org.apache » poi » util » [javadoc | source]
    1   
    2   /* ====================================================================
    3      Licensed to the Apache Software Foundation (ASF) under one or more
    4      contributor license agreements.  See the NOTICE file distributed with
    5      this work for additional information regarding copyright ownership.
    6      The ASF licenses this file to You under the Apache License, Version 2.0
    7      (the "License"); you may not use this file except in compliance with
    8      the License.  You may obtain a copy of the License at
    9   
   10          http://www.apache.org/licenses/LICENSE-2.0
   11   
   12      Unless required by applicable law or agreed to in writing, software
   13      distributed under the License is distributed on an "AS IS" BASIS,
   14      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   15      See the License for the specific language governing permissions and
   16      limitations under the License.
   17   ==================================================================== */
   18           
   19   package org.apache.poi.util;
   20   
   21   import java.io.UnsupportedEncodingException;
   22   import java.text.FieldPosition;
   23   import java.text.NumberFormat;
   24   /** 
   25    *  Title: String Utility Description: Collection of string handling utilities 
   26    *  
   27    * 
   28    *@author     Andrew C. Oliver 
   29    *@author     Sergei Kozello (sergeikozello at mail.ru) 
   30    *@author     Toshiaki Kamoshida (kamoshida.toshiaki at future dot co dot jp) 
   31    *@since      May 10, 2002 
   32    *@version    1.0 
   33    */
   34   public class StringUtil {
   35   	private final static String ENCODING = "ISO-8859-1";
   36   	/**     
   37   	 *  Constructor for the StringUtil object     
   38   	 */
   39   	private StringUtil() {
   40   	}
   41   
   42   	/**     
   43   	 *  Given a byte array of 16-bit unicode characters in Little Endian
   44   	 *  format (most important byte last), return a Java String representation
   45   	 *  of it. 
   46   	 *     
   47   	 * { 0x16, 0x00 } -0x16     
   48   	 *      
   49   	 * @param  string  the byte array to be converted
   50   	 * @param  offset  the initial offset into the
   51   	 *                 byte array. it is assumed that string[ offset ] and string[ offset +
   52   	 *                 1 ] contain the first 16-bit unicode character
   53        * @param len the length of the final string
   54   	 * @return                                     the converted string
   55   	 * @exception  ArrayIndexOutOfBoundsException  if offset is out of bounds for
   56   	 *      the byte array (i.e., is negative or is greater than or equal to     
   57   	 *      string.length)     
   58   	 * @exception  IllegalArgumentException        if len is too large (i.e.,
   59   	 *      there is not enough data in string to create a String of that     
   60   	 *      length)     
   61   	 */
   62   	public static String getFromUnicodeLE(
   63   		final byte[] string,
   64   		final int offset,
   65   		final int len)
   66   		throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
   67   		if ((offset < 0) || (offset >= string.length)) {
   68   			throw new ArrayIndexOutOfBoundsException("Illegal offset");
   69   		}
   70   		if ((len < 0) || (((string.length - offset) / 2) < len)) {
   71   			throw new IllegalArgumentException("Illegal length");
   72   		}
   73   
   74   		try {
   75   			return new String(string, offset, len * 2, "UTF-16LE");
   76   		} catch (UnsupportedEncodingException e) {
   77   			throw new InternalError(); /*unreachable*/
   78   		}
   79   	}
   80   
   81   	/**     
   82   	 *  Given a byte array of 16-bit unicode characters in little endian
   83   	 *  format (most important byte last), return a Java String representation
   84   	 *  of it. 
   85   	 *      
   86   	 * { 0x16, 0x00 } -0x16     
   87   	 *     
   88   	 *@param  string  the byte array to be converted     
   89   	 *@return         the converted string    
   90   	 */
   91   	public static String getFromUnicodeLE(final byte[] string) {
   92   		if(string.length == 0) { return ""; }
   93   		return getFromUnicodeLE(string, 0, string.length / 2);
   94   	}
   95   
   96   	/**     
   97   	 *  Given a byte array of 16-bit unicode characters in big endian
   98   	 *  format (most important byte first), return a Java String representation
   99   	 *  of it. 
  100   	 *      
  101   	 * { 0x00, 0x16 } -0x16     
  102   	 *     
  103   	 *@param  string                              the byte array to be converted     
  104   	 **@param  offset                              the initial offset into the     
  105   	 *      byte array. it is assumed that string[ offset ] and string[ offset +     
  106   	 *      1 ] contain the first 16-bit unicode character     
  107            *@param len the length of the final string     
  108   	 *@return                                     the converted string     
  109   	 *@exception  ArrayIndexOutOfBoundsException  if offset is out of bounds for     
  110   	 *      the byte array (i.e., is negative or is greater than or equal to     
  111   	 *      string.length)     
  112   	 *@exception  IllegalArgumentException        if len is too large (i.e.,     
  113   	 *      there is not enough data in string to create a String of that     
  114   	 *      length)     
  115   	 */
  116   	public static String getFromUnicodeBE(
  117   		final byte[] string,
  118   		final int offset,
  119   		final int len)
  120   		throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
  121   		if ((offset < 0) || (offset >= string.length)) {
  122   			throw new ArrayIndexOutOfBoundsException("Illegal offset");
  123   		}
  124   		if ((len < 0) || (((string.length - offset) / 2) < len)) {
  125   			throw new IllegalArgumentException("Illegal length");
  126   		}
  127   		try {
  128   			return new String(string, offset, len * 2, "UTF-16BE");
  129   		} catch (UnsupportedEncodingException e) {
  130   			throw new InternalError(); /*unreachable*/
  131   		}
  132   	}
  133   
  134   	/**     
  135   	 *  Given a byte array of 16-bit unicode characters in big endian
  136   	 *  format (most important byte first), return a Java String representation
  137   	 *  of it.
  138   	 *      
  139   	 * { 0x00, 0x16 } -0x16     
  140   	 *     
  141   	 *@param  string  the byte array to be converted     
  142   	 *@return         the converted string     
  143   	 */
  144   	public static String getFromUnicodeBE(final byte[] string) {
  145   		if(string.length == 0) { return ""; }
  146   		return getFromUnicodeBE(string, 0, string.length / 2);
  147   	}
  148   
  149   	/**      
  150   	 * Read 8 bit data (in ISO-8859-1 codepage) into a (unicode) Java
  151   	 * String and return.
  152   	 * (In Excel terms, read compressed 8 bit unicode as a string)
  153   	 *       
  154   	 * @param string byte array to read      
  155   	 * @param offset offset to read byte array      
  156   	 * @param len    length to read byte array      
  157   	 * @return String generated String instance by reading byte array      
  158   	 */
  159   	public static String getFromCompressedUnicode(
  160   		final byte[] string,
  161   		final int offset,
  162   		final int len) {
  163   		try {
  164   			int len_to_use = Math.min(len, string.length - offset);
  165   			return new String(string, offset, len_to_use, "ISO-8859-1");
  166   		} catch (UnsupportedEncodingException e) {
  167   			throw new InternalError(); /* unreachable */
  168   		}
  169   	}
  170   
  171   	/**      
  172   	 * Takes a unicode (java) string, and returns it as 8 bit data (in ISO-8859-1 
  173   	 * codepage).
  174   	 * (In Excel terms, write compressed 8 bit unicode)
  175   	 *     
  176   	 *@param  input   the String containing the data to be written     
  177   	 *@param  output  the byte array to which the data is to be written     
  178   	 *@param  offset  an offset into the byte arrat at which the data is start     
  179   	 *      when written     
  180   	 */
  181   	public static void putCompressedUnicode(
  182   		final String input,
  183   		final byte[] output,
  184   		final int offset) {
  185   		try {
  186   			byte[] bytes = input.getBytes("ISO-8859-1");
  187   			System.arraycopy(bytes, 0, output, offset, bytes.length);
  188   		} catch (UnsupportedEncodingException e) {
  189   			throw new InternalError(); /*unreachable*/
  190   		}
  191   	}
  192   
  193   	/**     
  194   	 * Takes a unicode string, and returns it as little endian (most 
  195   	 * important byte last) bytes in the supplied byte array.
  196   	 * (In Excel terms, write uncompressed unicode)
  197   	 *     
  198   	 *@param  input   the String containing the unicode data to be written     
  199   	 *@param  output  the byte array to hold the uncompressed unicode, should be twice the length of the String
  200   	 *@param  offset  the offset to start writing into the byte array     
  201   	 */
  202   	public static void putUnicodeLE(
  203   		final String input,
  204   		final byte[] output,
  205   		final int offset) {
  206   		try {
  207   			byte[] bytes = input.getBytes("UTF-16LE");
  208   			System.arraycopy(bytes, 0, output, offset, bytes.length);
  209   		} catch (UnsupportedEncodingException e) {
  210   			throw new InternalError(); /*unreachable*/
  211   		}
  212   	}
  213   
  214   	/**     
  215   	 * Takes a unicode string, and returns it as big endian (most 
  216   	 * important byte first) bytes in the supplied byte array.
  217   	 * (In Excel terms, write uncompressed unicode)
  218   	 *     
  219   	 *@param  input   the String containing the unicode data to be written     
  220   	 *@param  output  the byte array to hold the uncompressed unicode, should be twice the length of the String
  221   	 *@param  offset  the offset to start writing into the byte array     
  222   	 */
  223   	public static void putUnicodeBE(
  224   		final String input,
  225   		final byte[] output,
  226   		final int offset) {
  227   		try {
  228   			byte[] bytes = input.getBytes("UTF-16BE");
  229   			System.arraycopy(bytes, 0, output, offset, bytes.length);
  230   		} catch (UnsupportedEncodingException e) {
  231   			throw new InternalError(); /*unreachable*/
  232   		}
  233   	}
  234   
  235   	/**     
  236   	 *  Apply printf() like formatting to a string.      
  237   	 *  Primarily used for logging.    
  238   	 *@param  message  the string with embedded formatting info 
  239   	 *                 eg. "This is a test %2.2"     
  240   	 *@param  params   array of values to format into the string     
  241   	 *@return          The formatted string     
  242   	 */
  243   	public static String format(String message, Object[] params) {
  244   		int currentParamNumber = 0;
  245   		StringBuffer formattedMessage = new StringBuffer();
  246   		for (int i = 0; i < message.length(); i++) {
  247   			if (message.charAt(i) == '%') {
  248   				if (currentParamNumber >= params.length) {
  249   					formattedMessage.append("?missing data?");
  250   				} else if (
  251   					(params[currentParamNumber] instanceof Number)
  252   						&& (i + 1 < message.length())) {
  253   					i
  254   						+= matchOptionalFormatting(
  255   							(Number) params[currentParamNumber++],
  256   							message.substring(i + 1),
  257   							formattedMessage);
  258   				} else {
  259   					formattedMessage.append(
  260   						params[currentParamNumber++].toString());
  261   				}
  262   			} else {
  263   				if ((message.charAt(i) == '\\')
  264   					&& (i + 1 < message.length())
  265   					&& (message.charAt(i + 1) == '%')) {
  266   					formattedMessage.append('%');
  267   					i++;
  268   				} else {
  269   					formattedMessage.append(message.charAt(i));
  270   				}
  271   			}
  272   		}
  273   		return formattedMessage.toString();
  274   	}
  275   
  276   
  277   	private static int matchOptionalFormatting(
  278   		Number number,
  279   		String formatting,
  280   		StringBuffer outputTo) {
  281   		NumberFormat numberFormat = NumberFormat.getInstance();
  282   		if ((0 < formatting.length())
  283   			&& Character.isDigit(formatting.charAt(0))) {
  284   			numberFormat.setMinimumIntegerDigits(
  285   				Integer.parseInt(formatting.charAt(0) + ""));
  286   			if ((2 < formatting.length())
  287   				&& (formatting.charAt(1) == '.')
  288   				&& Character.isDigit(formatting.charAt(2))) {
  289   				numberFormat.setMaximumFractionDigits(
  290   					Integer.parseInt(formatting.charAt(2) + ""));
  291   				numberFormat.format(number, outputTo, new FieldPosition(0));
  292   				return 3;
  293   			}
  294   			numberFormat.format(number, outputTo, new FieldPosition(0));
  295   			return 1;
  296   		} else if (
  297   			(0 < formatting.length()) && (formatting.charAt(0) == '.')) {
  298   			if ((1 < formatting.length())
  299   				&& Character.isDigit(formatting.charAt(1))) {
  300   				numberFormat.setMaximumFractionDigits(
  301   					Integer.parseInt(formatting.charAt(1) + ""));
  302   				numberFormat.format(number, outputTo, new FieldPosition(0));
  303   				return 2;
  304   			}
  305   		}
  306   		numberFormat.format(number, outputTo, new FieldPosition(0));
  307   		return 1;
  308   	}
  309   
  310   	/**     
  311   	 * @return the encoding we want to use, currently hardcoded to ISO-8859-1     
  312   	 */
  313   	public static String getPreferredEncoding() {
  314   		return ENCODING;
  315   	}
  316   
  317   	/**
  318   	 * check the parameter has multibyte character
  319   	 *
  320   	 * @param value  string to check
  321   	 * @return  boolean result
  322   	 *  true:string has at least one multibyte character
  323   	 */
  324   	public static boolean hasMultibyte(String value){
  325   	    if( value == null )return false;
  326   	    for(int i = 0 ; i < value.length() ; i++ ){
  327   	        char c = value.charAt(i);
  328   	        if(c > 0xFF )return true;
  329   	    }
  330   	    return false;
  331   	}
  332   	
  333   	/**
  334   	 * Checks to see if a given String needs to be represented as Unicode
  335   	 * @param value 
  336   	 * @return true if string needs Unicode to be represented.
  337   	 */
  338   	  public static boolean isUnicodeString(final String value) {
  339   	    try {
  340   	      return !value.equals(new String(value.getBytes("ISO-8859-1"), "ISO-8859-1"));
  341   	    } catch (UnsupportedEncodingException e) {
  342   	      return true;
  343   	    }
  344   	  }
  345   }

Save This Page
Home » poi-src-3.2-FINAL-20081019 » org.apache » poi » util » [javadoc | source]