RomkanFullKatakana.java revision 3feb3d26d59be1626c59b72654020179e2b2ebe0
1/* 2 * Copyright (C) 2008,2009 OMRON SOFTWARE Co., Ltd. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package jp.co.omronsoft.openwnn.JAJP; 18 19import jp.co.omronsoft.openwnn.LetterConverter; 20import jp.co.omronsoft.openwnn.ComposingText; 21import jp.co.omronsoft.openwnn.StrSegment; 22import java.util.HashMap; 23import android.content.SharedPreferences; 24import java.util.regex.Pattern; 25import java.util.regex.Matcher; 26 27/** 28 * The Romaji to Full Katakana converter class for Japanese IME. 29 * 30 * @author Copyright (C) 2009 OMRON SOFTWARE CO., LTD. All Rights Reserved. 31 */ 32public class RomkanFullKatakana implements LetterConverter { 33 /** HashMap for Romaji-to-Kana conversion (Japanese mode) */ 34 private static final HashMap<String, String> mRomkanTable = new HashMap<String, String>() {{ 35 put("la", "\u30a1"); put("xa", "\u30a1"); put("a", "\u30a2"); 36 put("li", "\u30a3"); put("lyi", "\u30a3"); put("xi", "\u30a3"); 37 put("xyi", "\u30a3"); put("i", "\u30a4"); put("yi", "\u30a4"); 38 put("ye", "\u30a4\u30a7"); put("lu", "\u30a5"); put("xu", "\u30a5"); 39 put("u", "\u30a6"); put("whu", "\u30a6"); put("wu", "\u30a6"); 40 put("wha", "\u30a6\u30a1"); put("whi", "\u30a6\u30a3"); put("wi", "\u30a6\u30a3"); 41 put("we", "\u30a6\u30a7"); put("whe", "\u30a6\u30a7"); put("who", "\u30a6\u30a9"); 42 put("le", "\u30a7"); put("lye", "\u30a7"); put("xe", "\u30a7"); 43 put("xye", "\u30a7"); put("e", "\u30a8"); put("lo", "\u30a9"); 44 put("xo", "\u30a9"); put("o", "\u30aa"); put("ca", "\u30ab"); 45 put("ka", "\u30ab"); put("ga", "\u30ac"); put("ki", "\u30ad"); 46 put("kyi", "\u30ad\u30a3"); put("kye", "\u30ad\u30a7"); put("kya", "\u30ad\u30e3"); 47 put("kyu", "\u30ad\u30e5"); put("kyo", "\u30ad\u30e7"); put("gi", "\u30ae"); 48 put("gyi", "\u30ae\u30a3"); put("gye", "\u30ae\u30a7"); put("gya", "\u30ae\u30e3"); 49 put("gyu", "\u30ae\u30e5"); put("gyo", "\u30ae\u30e7"); put("cu", "\u30af"); 50 put("ku", "\u30af"); put("qu", "\u30af"); put("kwa", "\u30af\u30a1"); 51 put("qa", "\u30af\u30a1"); put("qwa", "\u30af\u30a1"); put("qi", "\u30af\u30a3"); 52 put("qwi", "\u30af\u30a3"); put("qyi", "\u30af\u30a3"); put("qwu", "\u30af\u30a5"); 53 put("qe", "\u30af\u30a7"); put("qwe", "\u30af\u30a7"); put("qye", "\u30af\u30a7"); 54 put("qo", "\u30af\u30a9"); put("qwo", "\u30af\u30a9"); put("qya", "\u30af\u30e3"); 55 put("qyu", "\u30af\u30e5"); put("qyo", "\u30af\u30e7"); put("gu", "\u30b0"); 56 put("gwa", "\u30b0\u30a1"); put("gwi", "\u30b0\u30a3"); put("gwu", "\u30b0\u30a5"); 57 put("gwe", "\u30b0\u30a7"); put("gwo", "\u30b0\u30a9"); put("ke", "\u30b1"); 58 put("ge", "\u30b2"); put("co", "\u30b3"); put("ko", "\u30b3"); 59 put("go", "\u30b4"); put("sa", "\u30b5"); put("za", "\u30b6"); 60 put("ci", "\u30b7"); put("shi", "\u30b7"); put("si", "\u30b7"); 61 put("syi", "\u30b7\u30a3"); put("she", "\u30b7\u30a7"); put("sye", "\u30b7\u30a7"); 62 put("sha", "\u30b7\u30e3"); put("sya", "\u30b7\u30e3"); put("shu", "\u30b7\u30e5"); 63 put("syu", "\u30b7\u30e5"); put("sho", "\u30b7\u30e7"); put("syo", "\u30b7\u30e7"); 64 put("ji", "\u30b8"); put("zi", "\u30b8"); put("jyi", "\u30b8\u30a3"); 65 put("zyi", "\u30b8\u30a3"); put("je", "\u30b8\u30a7"); put("jye", "\u30b8\u30a7"); 66 put("zye", "\u30b8\u30a7"); put("ja", "\u30b8\u30e3"); put("jya", "\u30b8\u30e3"); 67 put("zya", "\u30b8\u30e3"); put("ju", "\u30b8\u30e5"); put("jyu", "\u30b8\u30e5"); 68 put("zyu", "\u30b8\u30e5"); put("jo", "\u30b8\u30e7"); put("jyo", "\u30b8\u30e7"); 69 put("zyo", "\u30b8\u30e7"); put("su", "\u30b9"); put("swa", "\u30b9\u30a1"); 70 put("swi", "\u30b9\u30a3"); put("swu", "\u30b9\u30a5"); put("swe", "\u30b9\u30a7"); 71 put("swo", "\u30b9\u30a9"); put("zu", "\u30ba"); put("ce", "\u30bb"); 72 put("se", "\u30bb"); put("ze", "\u30bc"); put("so", "\u30bd"); 73 put("zo", "\u30be"); put("ta", "\u30bf"); put("da", "\u30c0"); 74 put("chi", "\u30c1"); put("ti", "\u30c1"); put("cyi", "\u30c1\u30a3"); 75 put("tyi", "\u30c1\u30a3"); put("che", "\u30c1\u30a7"); put("cye", "\u30c1\u30a7"); 76 put("tye", "\u30c1\u30a7"); put("cha", "\u30c1\u30e3"); put("cya", "\u30c1\u30e3"); 77 put("tya", "\u30c1\u30e3"); put("chu", "\u30c1\u30e5"); put("cyu", "\u30c1\u30e5"); 78 put("tyu", "\u30c1\u30e5"); put("cho", "\u30c1\u30e7"); put("cyo", "\u30c1\u30e7"); 79 put("tyo", "\u30c1\u30e7"); put("di", "\u30c2"); put("dyi", "\u30c2\u30a3"); 80 put("dye", "\u30c2\u30a7"); put("dya", "\u30c2\u30e3"); put("dyu", "\u30c2\u30e5"); 81 put("dyo", "\u30c2\u30e7"); put("ltsu", "\u30c3"); put("ltu", "\u30c3"); 82 put("xtu", "\u30c3"); put("", "\u30c3"); put("tsu", "\u30c4"); 83 put("tu", "\u30c4"); put("tsa", "\u30c4\u30a1"); put("tsi", "\u30c4\u30a3"); 84 put("tse", "\u30c4\u30a7"); put("tso", "\u30c4\u30a9"); put("du", "\u30c5"); 85 put("te", "\u30c6"); put("thi", "\u30c6\u30a3"); put("the", "\u30c6\u30a7"); 86 put("tha", "\u30c6\u30e3"); put("thu", "\u30c6\u30e5"); put("tho", "\u30c6\u30e7"); 87 put("de", "\u30c7"); put("dhi", "\u30c7\u30a3"); put("dhe", "\u30c7\u30a7"); 88 put("dha", "\u30c7\u30e3"); put("dhu", "\u30c7\u30e5"); put("dho", "\u30c7\u30e7"); 89 put("to", "\u30c8"); put("twa", "\u30c8\u30a1"); put("twi", "\u30c8\u30a3"); 90 put("twu", "\u30c8\u30a5"); put("twe", "\u30c8\u30a7"); put("two", "\u30c8\u30a9"); 91 put("do", "\u30c9"); put("dwa", "\u30c9\u30a1"); put("dwi", "\u30c9\u30a3"); 92 put("dwu", "\u30c9\u30a5"); put("dwe", "\u30c9\u30a7"); put("dwo", "\u30c9\u30a9"); 93 put("na", "\u30ca"); put("ni", "\u30cb"); put("nyi", "\u30cb\u30a3"); 94 put("nye", "\u30cb\u30a7"); put("nya", "\u30cb\u30e3"); put("nyu", "\u30cb\u30e5"); 95 put("nyo", "\u30cb\u30e7"); put("nu", "\u30cc"); put("ne", "\u30cd"); 96 put("no", "\u30ce"); put("ha", "\u30cf"); put("ba", "\u30d0"); 97 put("pa", "\u30d1"); put("hi", "\u30d2"); put("hyi", "\u30d2\u30a3"); 98 put("hye", "\u30d2\u30a7"); put("hya", "\u30d2\u30e3"); put("hyu", "\u30d2\u30e5"); 99 put("hyo", "\u30d2\u30e7"); put("bi", "\u30d3"); put("byi", "\u30d3\u30a3"); 100 put("bye", "\u30d3\u30a7"); put("bya", "\u30d3\u30e3"); put("byu", "\u30d3\u30e5"); 101 put("byo", "\u30d3\u30e7"); put("pi", "\u30d4"); put("pyi", "\u30d4\u30a3"); 102 put("pye", "\u30d4\u30a7"); put("pya", "\u30d4\u30e3"); put("pyu", "\u30d4\u30e5"); 103 put("pyo", "\u30d4\u30e7"); put("fu", "\u30d5"); put("hu", "\u30d5"); 104 put("fa", "\u30d5\u30a1"); put("fwa", "\u30d5\u30a1"); put("fi", "\u30d5\u30a3"); 105 put("fwi", "\u30d5\u30a3"); put("fyi", "\u30d5\u30a3"); put("fwu", "\u30d5\u30a5"); 106 put("fe", "\u30d5\u30a7"); put("fwe", "\u30d5\u30a7"); put("fye", "\u30d5\u30a7"); 107 put("fo", "\u30d5\u30a9"); put("fwo", "\u30d5\u30a9"); put("fya", "\u30d5\u30e3"); 108 put("fyu", "\u30d5\u30e5"); put("fyo", "\u30d5\u30e7"); put("bu", "\u30d6"); 109 put("pu", "\u30d7"); put("he", "\u30d8"); put("be", "\u30d9"); 110 put("pe", "\u30da"); put("ho", "\u30db"); put("bo", "\u30dc"); 111 put("po", "\u30dd"); put("ma", "\u30de"); put("mi", "\u30df"); 112 put("myi", "\u30df\u30a3"); put("mye", "\u30df\u30a7"); put("mya", "\u30df\u30e3"); 113 put("myu", "\u30df\u30e5"); put("myo", "\u30df\u30e7"); put("mu", "\u30e0"); 114 put("me", "\u30e1"); put("mo", "\u30e2"); put("lya", "\u30e3"); 115 put("xya", "\u30e3"); put("ya", "\u30e4"); put("lyu", "\u30e5"); 116 put("xyu", "\u30e5"); put("yu", "\u30e6"); put("lyo", "\u30e7"); 117 put("xyo", "\u30e7"); put("yo", "\u30e8"); put("ra", "\u30e9"); 118 put("ri", "\u30ea"); put("ryi", "\u30ea\u30a3"); put("rye", "\u30ea\u30a7"); 119 put("rya", "\u30ea\u30e3"); put("ryu", "\u30ea\u30e5"); put("ryo", "\u30ea\u30e7"); 120 put("ru", "\u30eb"); put("re", "\u30ec"); put("ro", "\u30ed"); 121 put("lwa", "\u30ee"); put("xwa", "\u30ee"); put("wa", "\u30ef"); 122 put("wo", "\u30f2"); put("nn", "\u30f3"); put("xn", "\u30f3"); 123 put("vu", "\u30f4"); put("va", "\u30f4\u30a1"); put("vi", "\u30f4\u30a3"); 124 put("vyi", "\u30f4\u30a3"); put("ve", "\u30f4\u30a7"); put("vye", "\u30f4\u30a7"); 125 put("vo", "\u30f4\u30a9"); put("vya", "\u30f4\u30e3"); put("vyu", "\u30f4\u30e5"); 126 put("vyo", "\u30f4\u30e7"); 127 put("bb", "\u30c3b"); put("cc", "\u30c3c"); put("dd", "\u30c3d"); 128 put("ff", "\u30c3f"); put("gg", "\u30c3g"); put("hh", "\u30c3h"); 129 put("jj", "\u30c3j"); put("kk", "\u30c3k"); put("ll", "\u30c3l"); 130 put("mm", "\u30c3m"); put("pp", "\u30c3p"); put("qq", "\u30c3q"); 131 put("rr", "\u30c3r"); put("ss", "\u30c3s"); put("tt", "\u30c3t"); 132 put("vv", "\u30c3v"); put("ww", "\u30c3w"); put("xx", "\u30c3x"); 133 put("yy", "\u30c3y"); put("zz", "\u30c3z"); put("nb", "\u30f3b"); 134 put("nc", "\u30f3c"); put("nd", "\u30f3d"); put("nf", "\u30f3f"); 135 put("ng", "\u30f3g"); put("nh", "\u30f3h"); put("nj", "\u30f3j"); 136 put("nk", "\u30f3k"); put("nm", "\u30f3m"); put("np", "\u30f3p"); 137 put("nq", "\u30f3q"); put("nr", "\u30f3r"); put("ns", "\u30f3s"); 138 put("nt", "\u30f3t"); put("nv", "\u30f3v"); put("nw", "\u30f3w"); 139 put("nx", "\u30f3x"); put("nz", "\u30f3z"); put("nl", "\u30f3l"); 140 put("-", "\u30fc"); put(".", "\u3002"); put(",", "\u3001"); put("?", "\uff1f"); put("/", "\u30fb"); 141 }}; 142 143 /** @see LetterConverter#convert */ 144 public boolean convert(ComposingText text) { 145 return convert(text, mRomkanTable); 146 } 147 148 /** 149 * convert Romaji to Full Katakana 150 * 151 * @param text The input/output text 152 * @param table HashMap for Romaji-to-Kana conversion 153 * @return {@code true} if conversion is compleated; {@code false} if not 154 */ 155 public static boolean convert(ComposingText text, HashMap<String, String> table) { 156 int cursor = text.getCursor(1); 157 158 if (cursor <= 0) { 159 return false; 160 } 161 162 StrSegment[] str = new StrSegment[3]; 163 int start = 2; 164 str[2] = text.getStrSegment(ComposingText.LAYER1, cursor - 1); 165 if (cursor >= 2) { 166 str[1] = text.getStrSegment(ComposingText.LAYER1, cursor - 2); 167 start = 1; 168 if (cursor >= 3) { 169 str[0] = text.getStrSegment(ComposingText.LAYER1, cursor - 3); 170 start = 0; 171 } 172 } 173 174 StringBuffer key = new StringBuffer(); 175 while (start < 3) { 176 for (int i = start; i < 3; i++) { 177 key.append(str[i].string); 178 } 179 boolean upper = Character.isUpperCase(key.charAt(key.length() - 1)); 180 String match = table.get(key.toString().toLowerCase()); 181 if (match != null) { 182 if (upper) { 183 match = match.toUpperCase(); 184 } 185 StrSegment[] out; 186 if (match.length() == 1) { 187 out = new StrSegment[1]; 188 out[0] = new StrSegment(match, str[start].from, str[2].to); 189 text.replaceStrSegment(ComposingText.LAYER1, out, 3 - start); 190 } else { 191 out = new StrSegment[2]; 192 out[0] = new StrSegment(match.substring(0, match.length() - 1), str[start].from, str[2].to - 1); 193 out[1] = new StrSegment(match.substring(match.length() - 1), str[2].to, str[2].to); 194 text.replaceStrSegment(ComposingText.LAYER1, out, 3 - start); 195 } 196 String regex = ".*[a-zA-Z]$"; 197 Pattern p = Pattern.compile(regex); 198 Matcher m = p.matcher(text.toString(ComposingText.LAYER1)); 199 if (m.matches()) { 200 text.moveCursor(ComposingText.LAYER1, -1); 201 } 202 return true; 203 } 204 start++; 205 key.delete(0, key.length()); 206 } 207 208 return false; 209 } 210 211 /** @see LetterConverter#setPreferences */ 212 public void setPreferences(SharedPreferences pref) {} 213} 214