Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/mayhoo/kanji/JapaneseString.java


1   // $Id: JapaneseString.java,v 1.2 2002/04/20 18:10:24 djmay Exp $
2   package com.mayhoo.kanji;
3   
4   /**
5    * JapaneseString contains static functions to do various tests
6    * on Strings to determine if it consists one of the various types of 
7    * characters used in the japanese writing system.
8    *
9    * There are also a functions to translate between Katakana, Hiragana,
10   * and Romaji.
11   *
12   * @author Duane J. May <djmay@mayhoo.com>
13   * @version $Id: JapaneseString.java,v 1.2 2002/04/20 18:10:24 djmay Exp $
14   */
15  public class JapaneseString {
16  
17      /** Version information */
18      private final static String VERSION = 
19    "$Id: JapaneseString.java,v 1.2 2002/04/20 18:10:24 djmay Exp $";
20  
21      /**
22       * Determines if this character is a Japanese Kana.
23       */
24      public static boolean isKana(String str) {
25          return ( isHiragana(str) || isKatakana(str) );
26      }
27  
28      /**
29       * Determines if this character is one of the Japanese Hiragana.
30       */
31      public static boolean isHiragana(String str) {
32    int size = str.length();
33  
34    for ( int i = 0; i < size; i++ ) {
35        char c = str.charAt( i );
36        if ( !( ( '\u3041' <= c ) && ( c <= '\u309e' ) ) ) {
37            return false;
38        }
39    }
40    return true;
41      }
42  
43      /**
44       * Determines if this character is one of the Japanese Katakana.
45       */
46  
47      public static boolean isKatakana(String str) {
48          return ( isHalfwidthKatakana(str) || isFullwidthKatakana(str) );
49      }
50      
51      /**
52       * Determines if this character is a Half width Katakana.
53       */
54      public static boolean isHalfwidthKatakana(String str) {
55    int size = str.length();
56  
57    for ( int i = 0; i < size; i++ ) {
58        char c = str.charAt( i );
59        if ( !( ( '\uff66' <= c ) && ( c <= '\uff9d' ) ) ) {
60            return false;
61        }
62    }
63    return true;
64      }
65  
66      /**
67       * Determines if this character is a Full width Katakana.
68       */
69      public static boolean isFullwidthKatakana(String str) {
70    int size = str.length();
71  
72    for ( int i = 0; i < size; i++ ) {
73        char c = str.charAt( i );
74        if ( !( ( '\u30a1' <= c ) && ( c <= '\u30fe' ) ) ) {
75            return false;
76        }
77    }
78    return true;
79      }
80  
81      /**
82       * Determines if this character is a Kanji character.
83       */
84      public static boolean isKanji(String str) {
85    int size = str.length();
86  
87    for ( int i = 0; i < size; i++ ) {
88        char c = str.charAt( i );
89        boolean charIsKanji = false;
90        if ( ('\u4e00' <= c ) && ( c <= '\u9fa5' ) ) {
91      charIsKanji = true;
92        }
93        if ( ( '\u3005' <= c ) && ( c <= '\u3007' ) ) {
94      charIsKanji = true;
95        }
96        if ( ! charIsKanji ) {
97            return false;
98        }
99    }
100   return true;
101     }
102 
103     /**
104      * Determines if this character could be used as part of 
105      * a romaji character.
106      */
107     public static boolean isRomaji(String str) {
108   int size = str.length();
109 
110   for ( int i = 0; i < size; i++ ) {
111       char c = str.charAt( i );
112       boolean charIsRomaji = false;
113       if (('\u0041' <= c) && (c <= '\u0090')) {
114     charIsRomaji = true;
115       } else if (('\u0061' <= c) && (c <= '\u007a')) {
116     charIsRomaji = true;
117       } else if (('\u0021' <= c) && (c <= '\u003a')) {
118     charIsRomaji = true;
119       } else if (('\u0041' <= c) && (c <= '\u005a')) {
120     charIsRomaji = true;
121       }
122       if ( ! charIsRomaji ) {
123           return false;
124       }
125   }
126   return true;
127     }
128 
129     /**
130      * Translates this character into the equivalent Katakana character.
131      * The function only operates on Hiragana and always returns the
132      * Full width version of the Katakana. If the character is outside the
133      * Hiragana then the origianal character is returned.
134      */
135     public static String toKatakana(String str) {
136   StringBuffer buf = new StringBuffer();
137   int size = str.length();
138 
139   for ( int i = 0; i < size; i++ ) {
140       char c = str.charAt( i );
141       buf.append( JapaneseCharacter.toKatakana( c ) );
142   }
143         return buf.toString();
144     }
145 
146 
147     /**
148      * Translates this character into the equivalent Hiragana character.
149      * The function only operates on Katakana characters
150      * If the character is outside the Full width or Half width 
151      * Katakana then the origianal character is returned.
152      */
153     public static String toHiragana(String str) {
154   StringBuffer buf = new StringBuffer();
155   int size = str.length();
156 
157   for ( int i = 0; i < size; i++ ) {
158       char c = str.charAt( i );
159       buf.append( JapaneseCharacter.toHiragana( c ) );
160   }
161         return buf.toString();
162     }
163     
164 
165     /**
166      * Translates this character into the equivalent Romaji character.
167      * The function only operates on Hiragana and Katakana characters
168      * If the character is outside the given range then 
169      * the origianal character is returned.
170      * 
171      * The resulting string is lowercase if the input was Hiragana and
172      * UPPERCASE if the input was Katakana.
173      */
174     public static String toRomaji( String str ) {
175   StringBuffer buf = new StringBuffer();
176   int size = str.length();
177         String convert;
178   boolean isHiragana;
179 
180   for ( int i = 0; i < size - 1; i++ ) {
181       char c = str.charAt( i );
182       char nextChar = str.charAt( i + 1 );
183       isHiragana = JapaneseCharacter.isHiragana( c );
184       c = JapaneseCharacter.toHiragana( c );
185       nextChar = JapaneseCharacter.toHiragana( nextChar );
186 
187       if ( nextChar == '\u3087' ||
188                  nextChar == '\u3085' ||
189      nextChar == '\u3083' )
190             {
191           convert = voicedRomaji[ c - 0x3041 ];
192       } else {
193     convert = romaji[ c - 0x3041 ];
194             }
195             if ( ! isHiragana ) {
196                 convert = convert.toUpperCase();
197             } 
198       buf.append( convert );
199   }
200   char c = str.charAt( size - 1 );
201   isHiragana = JapaneseCharacter.isHiragana( c );
202   c = JapaneseCharacter.toHiragana( c );
203   convert = romaji[ c - 0x3041 ];
204   if ( ! isHiragana ) {
205       convert = convert.toUpperCase();
206   } 
207   buf.append( convert );
208 
209         return buf.toString();
210     }
211     
212     /**
213      * The array used to map hirgana to romaji.
214      * Note the little ya, yu and yo characters need to follow
215      * a character in the voicedRomaji array.
216      */
217     protected static String romaji[] = { 
218         "a", "a", 
219   "i", "i", 
220   "u", "u", 
221   "e", "e", 
222   "o", "o",
223   
224         "ka", "ga", 
225   "ki", "gi", 
226   "ku", "gu", 
227   "ke", "ge", 
228   "ko", "go",
229   
230         "sa", "za", 
231   "shi", "ji", 
232   "su", "zu", 
233   "se", "ze", 
234   "so", "zo",
235   
236         "ta", "da", 
237   "chi", "ji", 
238   "tsu", "tsu", "zu", 
239   "te", "de", 
240   "to", "do",
241   
242         "na", 
243   "ni", 
244   "nu", 
245   "ne", 
246   "no",
247         
248   "ha", "ba", "pa", 
249   "hi", "bi", "pi", 
250   "fu", "bu", "pu", 
251         "he", "be", "pe", 
252   "ho", "bo", "po",
253 
254         "ma", 
255   "mi", 
256   "mu", 
257   "me", 
258   "mo",
259         
260   "a", "ya", 
261   "u", "yu", 
262   "o", "yo",
263 
264         "ra", 
265   "ri", 
266   "ru", 
267   "re", 
268   "ro",
269         
270   "wa", "wa", 
271   "wi", "we", 
272   "o", 
273   "n", 
274         
275   "v", 
276   "ka", 
277   "ke" 
278   
279     };
280     
281     /**
282      * The array used to map hirgana to romaji. This is used
283      * when the character is followed by a little ya, yu, or yo.
284      */
285     protected static String voicedRomaji[] = { 
286         "", "", 
287   "", "", 
288   "", "", 
289   "", "", 
290   "", "",
291   
292         "", "", 
293   "ky", "gy", 
294   "", "", 
295   "", "", 
296   "", "",
297   
298         "", "", 
299   "sh", "j", 
300   "", "", 
301   "", "", 
302   "", "",
303   
304         "", "", 
305   "ch", "ji", 
306   "", "", "", 
307   "", "", 
308   "", "",
309   
310         "", 
311   "ny", 
312   "", 
313   "", 
314   "",
315         
316   "", "", "", 
317   "hy", "by", "py", 
318   "", "", "", 
319         "", "", "", 
320   "", "", "",
321 
322         "", 
323   "my", 
324   "", 
325   "", 
326   "",
327         
328   "a", "ya", 
329   "u", "yu", 
330   "o", "yo",
331 
332         "", 
333   "ry", 
334   "", 
335   "", 
336   "",
337         
338   "", "", 
339   "", "", 
340   "", 
341   "", 
342   
343   "", 
344   "", 
345   "" 
346     };
347     
348     /**
349      * Access the array to return the correct romaji string.
350      */
351     private static String lookupRomaji( char c ) {
352     return romaji[ c - 0x3041 ];
353     }    
354 }
355