Source code: com/mayhoo/kanji/JapaneseString.java
1 // $Id: JapaneseString.java,v 1.2 2002/04/20 18:10:24 djmay Exp $
2 package com.mayhoo.kanji;
3
4 /**
5 * JapaneseString contains static functions to do various tests
6 * on Strings to determine if it consists one of the various types of
7 * characters used in the japanese writing system.
8 *
9 * There are also a functions to translate between Katakana, Hiragana,
10 * and Romaji.
11 *
12 * @author Duane J. May <djmay@mayhoo.com>
13 * @version $Id: JapaneseString.java,v 1.2 2002/04/20 18:10:24 djmay Exp $
14 */
15 public class JapaneseString {
16
17 /** Version information */
18 private final static String VERSION =
19 "$Id: JapaneseString.java,v 1.2 2002/04/20 18:10:24 djmay Exp $";
20
21 /**
22 * Determines if this character is a Japanese Kana.
23 */
24 public static boolean isKana(String str) {
25 return ( isHiragana(str) || isKatakana(str) );
26 }
27
28 /**
29 * Determines if this character is one of the Japanese Hiragana.
30 */
31 public static boolean isHiragana(String str) {
32 int size = str.length();
33
34 for ( int i = 0; i < size; i++ ) {
35 char c = str.charAt( i );
36 if ( !( ( '\u3041' <= c ) && ( c <= '\u309e' ) ) ) {
37 return false;
38 }
39 }
40 return true;
41 }
42
43 /**
44 * Determines if this character is one of the Japanese Katakana.
45 */
46
47 public static boolean isKatakana(String str) {
48 return ( isHalfwidthKatakana(str) || isFullwidthKatakana(str) );
49 }
50
51 /**
52 * Determines if this character is a Half width Katakana.
53 */
54 public static boolean isHalfwidthKatakana(String str) {
55 int size = str.length();
56
57 for ( int i = 0; i < size; i++ ) {
58 char c = str.charAt( i );
59 if ( !( ( '\uff66' <= c ) && ( c <= '\uff9d' ) ) ) {
60 return false;
61 }
62 }
63 return true;
64 }
65
66 /**
67 * Determines if this character is a Full width Katakana.
68 */
69 public static boolean isFullwidthKatakana(String str) {
70 int size = str.length();
71
72 for ( int i = 0; i < size; i++ ) {
73 char c = str.charAt( i );
74 if ( !( ( '\u30a1' <= c ) && ( c <= '\u30fe' ) ) ) {
75 return false;
76 }
77 }
78 return true;
79 }
80
81 /**
82 * Determines if this character is a Kanji character.
83 */
84 public static boolean isKanji(String str) {
85 int size = str.length();
86
87 for ( int i = 0; i < size; i++ ) {
88 char c = str.charAt( i );
89 boolean charIsKanji = false;
90 if ( ('\u4e00' <= c ) && ( c <= '\u9fa5' ) ) {
91 charIsKanji = true;
92 }
93 if ( ( '\u3005' <= c ) && ( c <= '\u3007' ) ) {
94 charIsKanji = true;
95 }
96 if ( ! charIsKanji ) {
97 return false;
98 }
99 }
100 return true;
101 }
102
103 /**
104 * Determines if this character could be used as part of
105 * a romaji character.
106 */
107 public static boolean isRomaji(String str) {
108 int size = str.length();
109
110 for ( int i = 0; i < size; i++ ) {
111 char c = str.charAt( i );
112 boolean charIsRomaji = false;
113 if (('\u0041' <= c) && (c <= '\u0090')) {
114 charIsRomaji = true;
115 } else if (('\u0061' <= c) && (c <= '\u007a')) {
116 charIsRomaji = true;
117 } else if (('\u0021' <= c) && (c <= '\u003a')) {
118 charIsRomaji = true;
119 } else if (('\u0041' <= c) && (c <= '\u005a')) {
120 charIsRomaji = true;
121 }
122 if ( ! charIsRomaji ) {
123 return false;
124 }
125 }
126 return true;
127 }
128
129 /**
130 * Translates this character into the equivalent Katakana character.
131 * The function only operates on Hiragana and always returns the
132 * Full width version of the Katakana. If the character is outside the
133 * Hiragana then the origianal character is returned.
134 */
135 public static String toKatakana(String str) {
136 StringBuffer buf = new StringBuffer();
137 int size = str.length();
138
139 for ( int i = 0; i < size; i++ ) {
140 char c = str.charAt( i );
141 buf.append( JapaneseCharacter.toKatakana( c ) );
142 }
143 return buf.toString();
144 }
145
146
147 /**
148 * Translates this character into the equivalent Hiragana character.
149 * The function only operates on Katakana characters
150 * If the character is outside the Full width or Half width
151 * Katakana then the origianal character is returned.
152 */
153 public static String toHiragana(String str) {
154 StringBuffer buf = new StringBuffer();
155 int size = str.length();
156
157 for ( int i = 0; i < size; i++ ) {
158 char c = str.charAt( i );
159 buf.append( JapaneseCharacter.toHiragana( c ) );
160 }
161 return buf.toString();
162 }
163
164
165 /**
166 * Translates this character into the equivalent Romaji character.
167 * The function only operates on Hiragana and Katakana characters
168 * If the character is outside the given range then
169 * the origianal character is returned.
170 *
171 * The resulting string is lowercase if the input was Hiragana and
172 * UPPERCASE if the input was Katakana.
173 */
174 public static String toRomaji( String str ) {
175 StringBuffer buf = new StringBuffer();
176 int size = str.length();
177 String convert;
178 boolean isHiragana;
179
180 for ( int i = 0; i < size - 1; i++ ) {
181 char c = str.charAt( i );
182 char nextChar = str.charAt( i + 1 );
183 isHiragana = JapaneseCharacter.isHiragana( c );
184 c = JapaneseCharacter.toHiragana( c );
185 nextChar = JapaneseCharacter.toHiragana( nextChar );
186
187 if ( nextChar == '\u3087' ||
188 nextChar == '\u3085' ||
189 nextChar == '\u3083' )
190 {
191 convert = voicedRomaji[ c - 0x3041 ];
192 } else {
193 convert = romaji[ c - 0x3041 ];
194 }
195 if ( ! isHiragana ) {
196 convert = convert.toUpperCase();
197 }
198 buf.append( convert );
199 }
200 char c = str.charAt( size - 1 );
201 isHiragana = JapaneseCharacter.isHiragana( c );
202 c = JapaneseCharacter.toHiragana( c );
203 convert = romaji[ c - 0x3041 ];
204 if ( ! isHiragana ) {
205 convert = convert.toUpperCase();
206 }
207 buf.append( convert );
208
209 return buf.toString();
210 }
211
212 /**
213 * The array used to map hirgana to romaji.
214 * Note the little ya, yu and yo characters need to follow
215 * a character in the voicedRomaji array.
216 */
217 protected static String romaji[] = {
218 "a", "a",
219 "i", "i",
220 "u", "u",
221 "e", "e",
222 "o", "o",
223
224 "ka", "ga",
225 "ki", "gi",
226 "ku", "gu",
227 "ke", "ge",
228 "ko", "go",
229
230 "sa", "za",
231 "shi", "ji",
232 "su", "zu",
233 "se", "ze",
234 "so", "zo",
235
236 "ta", "da",
237 "chi", "ji",
238 "tsu", "tsu", "zu",
239 "te", "de",
240 "to", "do",
241
242 "na",
243 "ni",
244 "nu",
245 "ne",
246 "no",
247
248 "ha", "ba", "pa",
249 "hi", "bi", "pi",
250 "fu", "bu", "pu",
251 "he", "be", "pe",
252 "ho", "bo", "po",
253
254 "ma",
255 "mi",
256 "mu",
257 "me",
258 "mo",
259
260 "a", "ya",
261 "u", "yu",
262 "o", "yo",
263
264 "ra",
265 "ri",
266 "ru",
267 "re",
268 "ro",
269
270 "wa", "wa",
271 "wi", "we",
272 "o",
273 "n",
274
275 "v",
276 "ka",
277 "ke"
278
279 };
280
281 /**
282 * The array used to map hirgana to romaji. This is used
283 * when the character is followed by a little ya, yu, or yo.
284 */
285 protected static String voicedRomaji[] = {
286 "", "",
287 "", "",
288 "", "",
289 "", "",
290 "", "",
291
292 "", "",
293 "ky", "gy",
294 "", "",
295 "", "",
296 "", "",
297
298 "", "",
299 "sh", "j",
300 "", "",
301 "", "",
302 "", "",
303
304 "", "",
305 "ch", "ji",
306 "", "", "",
307 "", "",
308 "", "",
309
310 "",
311 "ny",
312 "",
313 "",
314 "",
315
316 "", "", "",
317 "hy", "by", "py",
318 "", "", "",
319 "", "", "",
320 "", "", "",
321
322 "",
323 "my",
324 "",
325 "",
326 "",
327
328 "a", "ya",
329 "u", "yu",
330 "o", "yo",
331
332 "",
333 "ry",
334 "",
335 "",
336 "",
337
338 "", "",
339 "", "",
340 "",
341 "",
342
343 "",
344 "",
345 ""
346 };
347
348 /**
349 * Access the array to return the correct romaji string.
350 */
351 private static String lookupRomaji( char c ) {
352 return romaji[ c - 0x3041 ];
353 }
354 }
355