Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/mysql/jdbc/StringUtils.java


1   /*
2    Copyright (C) 2002-2004 MySQL AB
3   
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of version 2 of the GNU General Public License as
6    published by the Free Software Foundation.
7    
8   
9    There are special exceptions to the terms and conditions of the GPL 
10   as it is applied to this software. View the full text of the 
11   exception exception in file EXCEPTIONS-CONNECTOR-J in the directory of this 
12   software distribution.
13  
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License for more details.
18  
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software
21   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22  
23   */
24  package com.mysql.jdbc;
25  
26  import java.io.ByteArrayOutputStream;
27  import java.io.UnsupportedEncodingException;
28  
29  import java.util.ArrayList;
30  import java.util.List;
31  import java.util.StringTokenizer;
32  
33  
34  /**
35   * Various utility methods for converting to/from byte arrays in the platform
36   * encoding
37   *
38   * @author Mark Matthews
39   */
40  public class StringUtils {
41      private static final int BYTE_RANGE = (1 + Byte.MAX_VALUE) - Byte.MIN_VALUE;
42      private static byte[] allBytes = new byte[BYTE_RANGE];
43      private static char[] byteToChars = new char[BYTE_RANGE];
44  
45      static {
46          for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
47              allBytes[i - Byte.MIN_VALUE] = (byte) i;
48          }
49  
50          String allBytesString = new String(allBytes, 0,
51                  Byte.MAX_VALUE - Byte.MIN_VALUE);
52  
53          int allBytesStringLen = allBytesString.length();
54  
55          for (int i = 0;
56                  (i < (Byte.MAX_VALUE - Byte.MIN_VALUE))
57                  && (i < allBytesStringLen); i++) {
58              byteToChars[i] = allBytesString.charAt(i);
59          }
60      }
61  
62      /**
63       * Returns the byte[] representation of the given string using given
64       * encoding.
65       *
66       * @param s the string to convert
67       * @param encoding the character encoding to use
68       * @param serverEncoding DOCUMENT ME!
69       * @param parserKnowsUnicode DOCUMENT ME!
70       *
71       * @return byte[] representation of the string
72       *
73       * @throws UnsupportedEncodingException if an encoding unsupported by the
74       *         JVM is supplied.
75       */
76      public static final byte[] getBytes(String s, String encoding,
77          String serverEncoding, boolean parserKnowsUnicode)
78          throws UnsupportedEncodingException {
79          SingleByteCharsetConverter converter = SingleByteCharsetConverter
80              .getInstance(encoding);
81  
82          return getBytes(s, converter, encoding, serverEncoding,
83              parserKnowsUnicode);
84      }
85  
86      /**
87       * Returns the byte[] representation of the given string (re)using the
88       * given charset converter, and the given encoding.
89       *
90       * @param s the string to convert
91       * @param converter the converter to reuse
92       * @param encoding the character encoding to use
93       * @param serverEncoding DOCUMENT ME!
94       * @param parserKnowsUnicode DOCUMENT ME!
95       *
96       * @return byte[] representation of the string
97       *
98       * @throws UnsupportedEncodingException if an encoding unsupported by the
99       *         JVM is supplied.
100      */
101     public static final byte[] getBytes(String s,
102         SingleByteCharsetConverter converter, String encoding,
103         String serverEncoding, boolean parserKnowsUnicode)
104         throws UnsupportedEncodingException {
105         byte[] b = null;
106 
107         if (converter != null) {
108             b = converter.toBytes(s);
109         } else if (encoding == null) {
110             b = s.getBytes();
111         } else {
112             b = s.getBytes(encoding);
113 
114             if (!parserKnowsUnicode
115                     && (encoding.equalsIgnoreCase("SJIS")
116                     || encoding.equalsIgnoreCase("BIG5")
117                     || encoding.equalsIgnoreCase("GBK"))) {
118                 if (!encoding.equalsIgnoreCase(serverEncoding)) {
119                     b = escapeEasternUnicodeByteStream(b, s, 0, s.length());
120                 }
121             }
122         }
123 
124         return b;
125     }
126 
127     /**
128      * DOCUMENT ME!
129      *
130      * @param s DOCUMENT ME!
131      * @param converter DOCUMENT ME!
132      * @param encoding DOCUMENT ME!
133      * @param serverEncoding DOCUMENT ME!
134      * @param offset DOCUMENT ME!
135      * @param length DOCUMENT ME!
136      * @param parserKnowsUnicode DOCUMENT ME!
137      *
138      * @return DOCUMENT ME!
139      *
140      * @throws UnsupportedEncodingException DOCUMENT ME!
141      */
142     public static final byte[] getBytes(String s,
143         SingleByteCharsetConverter converter, String encoding,
144         String serverEncoding, int offset, int length,
145         boolean parserKnowsUnicode) throws UnsupportedEncodingException {
146         byte[] b = null;
147 
148         if (converter != null) {
149             b = converter.toBytes(s, offset, length);
150         } else if (encoding == null) {
151             byte[] temp = s.getBytes();
152 
153             b = new byte[length];
154             System.arraycopy(temp, offset, b, 0, length);
155         } else {
156             byte[] temp = s.getBytes(encoding);
157 
158             b = new byte[length];
159             System.arraycopy(temp, offset, b, 0, length);
160 
161             if (!parserKnowsUnicode
162                     && (encoding.equalsIgnoreCase("SJIS")
163                     || encoding.equalsIgnoreCase("BIG5")
164                     || encoding.equalsIgnoreCase("GBK"))) {
165                 if (!encoding.equalsIgnoreCase(serverEncoding)) {
166                     b = escapeEasternUnicodeByteStream(b, s, offset, length);
167                 }
168             }
169         }
170 
171         return b;
172     }
173 
174     /**
175      * Dumps the given bytes to STDOUT as a hex dump (up to length bytes).
176      *
177      * @param byteBuffer the data to print as hex
178      * @param length the number of bytes to print
179      */
180     public static final void dumpAsHex(byte[] byteBuffer, int length) {
181         int p = 0;
182         int rows = length / 8;
183 
184         for (int i = 0; i < rows; i++) {
185             int ptemp = p;
186 
187             for (int j = 0; j < 8; j++) {
188                 String hexVal = Integer.toHexString((int) byteBuffer[ptemp]
189                         & 0xff);
190 
191                 if (hexVal.length() == 1) {
192                     hexVal = "0" + hexVal;
193                 }
194 
195                 System.out.print(hexVal + " ");
196                 ptemp++;
197             }
198 
199             System.out.print("    ");
200 
201             for (int j = 0; j < 8; j++) {
202                 if ((byteBuffer[p] > 32) && (byteBuffer[p] < 127)) {
203                     System.out.print((char) byteBuffer[p] + " ");
204                 } else {
205                     System.out.print(". ");
206                 }
207 
208                 p++;
209             }
210 
211             System.out.println();
212         }
213 
214         int n = 0;
215 
216         for (int i = p; i < length; i++) {
217             String hexVal = Integer.toHexString((int) byteBuffer[i] & 0xff);
218 
219             if (hexVal.length() == 1) {
220                 hexVal = "0" + hexVal;
221             }
222 
223             System.out.print(hexVal + " ");
224             n++;
225         }
226 
227         for (int i = n; i < 8; i++) {
228             System.out.print("   ");
229         }
230 
231         System.out.print("    ");
232 
233         for (int i = p; i < length; i++) {
234             if ((byteBuffer[i] > 32) && (byteBuffer[i] < 127)) {
235                 System.out.print((char) byteBuffer[i] + " ");
236             } else {
237                 System.out.print(". ");
238             }
239         }
240 
241         System.out.println();
242     }
243 
244     /**
245      * Returns the bytes as an ASCII String.
246      *
247      * @param buffer the bytes representing the string
248      *
249      * @return The ASCII String.
250      */
251     public static final String toAsciiString(byte[] buffer) {
252         return toAsciiString(buffer, 0, buffer.length);
253     }
254 
255     /**
256      * Returns the bytes as an ASCII String.
257      *
258      * @param buffer the bytes to convert
259      * @param startPos the position to start converting
260      * @param length the length of the string to convert
261      *
262      * @return the ASCII string
263      */
264     public static final String toAsciiString(byte[] buffer, int startPos,
265         int length) {
266         char[] charArray = new char[length];
267         int readpoint = startPos;
268 
269         for (int i = 0; i < length; i++) {
270             charArray[i] = (char) buffer[readpoint];
271             readpoint++;
272         }
273 
274         return new String(charArray);
275     }
276 
277     /**
278      * Unfortunately, SJIS has 0x5c as a high byte in some of its double-byte
279      * characters, so we need to escape it.
280      *
281      * @param origBytes the original bytes in SJIS format
282      * @param origString the string that had .getBytes() called on it
283      * @param offset where to start converting from
284      * @param length how many characters to convert.
285      *
286      * @return byte[] with 0x5c escaped
287      */
288     public static byte[] escapeEasternUnicodeByteStream(byte[] origBytes,
289         String origString, int offset, int length) {
290         if ((origBytes == null) || (origBytes.length == 0)) {
291             return origBytes;
292         }
293 
294         int bytesLen = origBytes.length;
295         int bufIndex = 0;
296         int strIndex = 0;
297 
298         ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(bytesLen);
299 
300         while (true) {
301             if (origString.charAt(strIndex) == '\\') {
302                 // write it out as-is
303                 bytesOut.write(origBytes[bufIndex++]);
304 
305                 //bytesOut.write(origBytes[bufIndex++]);
306             } else {
307                 // Grab the first byte
308                 int loByte = (int) origBytes[bufIndex];
309 
310                 if (loByte < 0) {
311                     loByte += 256; // adjust for signedness/wrap-around
312                 }
313 
314                 // We always write the first byte
315                 bytesOut.write(loByte);
316 
317                 //
318                 // The codepage characters in question exist between
319                 // 0x81-0x9F and 0xE0-0xFC...
320                 //
321                 // See:
322                 //
323                 // http://www.microsoft.com/GLOBALDEV/Reference/dbcs/932.htm
324                 //
325                 // Problematic characters in GBK
326                 //
327                 // U+905C : CJK UNIFIED IDEOGRAPH
328                 //
329                 // Problematic characters in Big5
330                 //
331                 // B9F0 = U+5C62 : CJK UNIFIED IDEOGRAPH
332                 //
333                 if (loByte >= 0x80) {
334                     if (bufIndex < (bytesLen - 1)) {
335                         int hiByte = (int) origBytes[bufIndex + 1];
336 
337                         if (hiByte < 0) {
338                             hiByte += 256; // adjust for signedness/wrap-around
339                         }
340 
341                         // write the high byte here, and increment the index
342                         // for the high byte
343                         bytesOut.write(hiByte);
344                         bufIndex++;
345 
346                         // escape 0x5c if necessary
347                         if (hiByte == 0x5C) {
348                             bytesOut.write(hiByte);
349                         }
350                     }
351                 } else if (loByte == 0x5c) {
352                     if (bufIndex < (bytesLen - 1)) {
353                         int hiByte = (int) origBytes[bufIndex + 1];
354 
355                         if (hiByte < 0) {
356                             hiByte += 256; // adjust for signedness/wrap-around
357                         }
358 
359                         if (hiByte == 0x62) {
360                             // we need to escape the 0x5c
361                             bytesOut.write(0x5c);
362                             bytesOut.write(0x62);
363                             bufIndex++;
364                         }
365                     }
366                 }
367 
368                 bufIndex++;
369             }
370 
371             if (bufIndex >= bytesLen) {
372                 // we're done
373                 break;
374             }
375 
376             strIndex++;
377         }
378 
379         return bytesOut.toByteArray();
380     }
381 
382     /**
383      * Returns the first non whitespace char, converted to upper case
384      *
385      * @param searchIn the string to search in
386      *
387      * @return the first non-whitespace character, upper cased.
388      */
389     public static char firstNonWsCharUc(String searchIn) {
390         if (searchIn == null) {
391             return 0;
392         }
393 
394         int length = searchIn.length();
395 
396         for (int i = 0; i < length; i++) {
397             char c = searchIn.charAt(i);
398 
399             if (!Character.isWhitespace(c)) {
400                 return Character.toUpperCase(c);
401             }
402         }
403 
404         return 0;
405     }
406 
407     /**
408      * Splits stringToSplit into a list, using the given delimitter
409      *
410      * @param stringToSplit the string to split
411      * @param delimitter the string to split on
412      * @param trim should the split strings be whitespace trimmed?
413      *
414      * @return the list of strings, split by delimitter
415      *
416      * @throws IllegalArgumentException DOCUMENT ME!
417      */
418     public static final List split(String stringToSplit, String delimitter,
419         boolean trim) {
420         if (stringToSplit == null) {
421             return new ArrayList();
422         }
423 
424         if (delimitter == null) {
425             throw new IllegalArgumentException();
426         }
427 
428         StringTokenizer tokenizer = new StringTokenizer(stringToSplit,
429                 delimitter, false);
430 
431         List splitTokens = new ArrayList(tokenizer.countTokens());
432 
433         while (tokenizer.hasMoreTokens()) {
434             String token = tokenizer.nextToken();
435 
436             if (trim) {
437                 token = token.trim();
438             }
439 
440             splitTokens.add(token);
441         }
442 
443         return splitTokens;
444     }
445 
446     /**
447      * Determines whether or not the string 'searchIn' contains the string
448      * 'searchFor', dis-regarding case. Shorthand for a
449      * String.regionMatch(...)
450      *
451      * @param searchIn the string to search in
452      * @param searchFor the string to search for
453      *
454      * @return whether searchIn starts with searchFor, ignoring case
455      */
456     public static boolean startsWithIgnoreCase(String searchIn, String searchFor) {
457         return startsWithIgnoreCase(searchIn, 0, searchFor);
458     }
459 
460     /**
461      * Determines whether or not the string 'searchIn' contains the string
462      * 'searchFor', dis-regarding case starting at 'startAt' Shorthand for a
463      * String.regionMatch(...)
464      *
465      * @param searchIn the string to search in
466      * @param startAt the position to start at
467      * @param searchFor the string to search for
468      *
469      * @return whether searchIn starts with searchFor, ignoring case
470      */
471     public static boolean startsWithIgnoreCase(String searchIn, int startAt,
472         String searchFor) {
473         return searchIn.regionMatches(true, 0, searchFor, startAt,
474             searchFor.length());
475     }
476 
477     /**
478      * Determines whether or not the sting 'searchIn' contains the string
479      * 'searchFor', di-regarding case and leading whitespace
480      *
481      * @param searchIn the string to search in
482      * @param searchFor the string to search for
483      *
484      * @return true if the string starts with 'searchFor' ignoring whitespace
485      */
486     public static boolean startsWithIgnoreCaseAndWs(String searchIn,
487         String searchFor) {
488         int beginPos = 0;
489 
490         int inLength = searchIn.length();
491 
492         for (beginPos = 0; beginPos < inLength; beginPos++) {
493             if (!Character.isWhitespace(searchIn.charAt(beginPos))) {
494                 break;
495             }
496         }
497 
498         return startsWithIgnoreCase(searchIn, beginPos, searchFor);
499     }
500 }