1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <stdio.h>
18#include <string.h>
19#include <assert.h>
20#include "../include/dictdef.h"
21
22#ifdef ___BUILD_MODEL___
23#include "../include/spellingtable.h"
24#endif
25
26#include "../include/spellingtrie.h"
27
28namespace ime_pinyin {
29
30SpellingTrie* SpellingTrie::instance_ = NULL;
31
32// z/c/s is for Zh/Ch/Sh
33const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
34    "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
35
36// Bit 0 : is it a Shengmu char?
37// Bit 1 : is it a Yunmu char? (one char is a Yunmu)
38// Bit 2 : is it enabled in ShouZiMu(first char) mode?
39unsigned char SpellingTrie::char_flags_[] = {
40  // a    b      c     d     e     f     g
41  0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
42  // h    i     j      k     l     m    n
43  0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
44  // o    p     q      r     s     t
45  0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
46  // u    v     w      x     y     z
47  0x00, 0x00, 0x01, 0x01, 0x01, 0x01
48};
49
50int compare_spl(const void* p1, const void* p2) {
51  return strcmp((const char*)(p1), (const char*)(p2));
52}
53
54SpellingTrie::SpellingTrie() {
55  spelling_buf_ = NULL;
56  spelling_size_ = 0;
57  spelling_num_ = 0;
58  spl_ym_ids_ = NULL;
59  splstr_queried_ = NULL;
60  splstr16_queried_ = NULL;
61  root_ = NULL;
62  dumb_node_ = NULL;
63  splitter_node_ = NULL;
64  instance_ = NULL;
65  ym_buf_ = NULL;
66  f2h_ = NULL;
67
68  szm_enable_shm(true);
69  szm_enable_ym(true);
70
71#ifdef ___BUILD_MODEL___
72  node_num_ = 0;
73#endif
74}
75
76SpellingTrie::~SpellingTrie() {
77  if (NULL != spelling_buf_)
78    delete [] spelling_buf_;
79
80  if (NULL != splstr_queried_)
81    delete [] splstr_queried_;
82
83  if (NULL != splstr16_queried_)
84    delete [] splstr16_queried_;
85
86  if (NULL != spl_ym_ids_)
87    delete [] spl_ym_ids_;
88
89  if (NULL != root_) {
90    free_son_trie(root_);
91    delete root_;
92  }
93
94  if (NULL != dumb_node_) {
95    delete [] dumb_node_;
96  }
97
98  if (NULL != splitter_node_) {
99    delete [] splitter_node_;
100  }
101
102  if (NULL != instance_) {
103    delete instance_;
104    instance_ = NULL;
105  }
106
107  if (NULL != ym_buf_)
108    delete [] ym_buf_;
109
110  if (NULL != f2h_)
111    delete [] f2h_;
112}
113
114bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
115  if (NULL == splid || 0 == *splid)
116    return false;
117
118  if (*splid >= kFullSplIdStart)
119    return true;
120  if (*splid < kFullSplIdStart) {
121    char ch = kHalfId2Sc_[*splid];
122    if (ch > 'Z') {
123      return true;
124    } else {
125      if (szm_is_enabled(ch)) {
126        return true;
127      } else if (is_yunmu_char(ch)) {
128        assert(h2f_num_[*splid] > 0);
129        *splid = h2f_start_[*splid];
130        return true;
131      }
132    }
133  }
134  return false;
135}
136
137bool SpellingTrie::is_half_id(uint16 splid) const {
138  if (0 == splid || splid >= kFullSplIdStart)
139    return false;
140
141  return true;
142}
143
144bool SpellingTrie::is_full_id(uint16 splid) const {
145  if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
146    return false;
147  return true;
148}
149
150bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
151  uint16 half_fr_full = full_to_half(full_id);
152
153  if (half_fr_full == half_id)
154    return true;
155
156  // &~0x20 is used to conver the char to upper case.
157  // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
158  char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
159  char ch_h = kHalfId2Sc_[half_id];
160  if (ch_f == ch_h)
161    return true;
162
163  return false;
164}
165
166bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
167  if (0 == splid || splid >= kFullSplIdStart)
168    return false;
169
170  char ch = kHalfId2Sc_[splid];
171  // If ch >= 'a', that means the half id is one of Zh/Ch/Sh
172  if (ch >= 'a') {
173    return false;
174  }
175
176  return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
177}
178
179bool SpellingTrie::is_shengmu_char(char ch) const {
180  return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
181}
182
183bool SpellingTrie::is_yunmu_char(char ch) const {
184  return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
185}
186
187bool SpellingTrie::is_szm_char(char ch) const {
188  return is_shengmu_char(ch) || is_yunmu_char(ch);
189}
190
191bool SpellingTrie::szm_is_enabled(char ch) const {
192  return char_flags_[ch - 'A'] & kHalfIdSzmMask;
193}
194
195void SpellingTrie::szm_enable_shm(bool enable) {
196  if (enable) {
197    for (char ch = 'A'; ch <= 'Z'; ch++) {
198      if (is_shengmu_char(ch))
199        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
200    }
201  } else {
202    for (char ch = 'A'; ch <= 'Z'; ch++) {
203      if (is_shengmu_char(ch))
204        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
205    }
206  }
207}
208
209void SpellingTrie::szm_enable_ym(bool enable) {
210  if (enable) {
211    for (char ch = 'A'; ch <= 'Z'; ch++) {
212      if (is_yunmu_char(ch))
213        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
214    }
215  } else {
216    for (char ch = 'A'; ch <= 'Z'; ch++) {
217      if (is_yunmu_char(ch))
218        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
219    }
220  }
221}
222
223bool SpellingTrie::is_szm_enabled(char ch) const {
224  return char_flags_[ch - 'A'] & kHalfIdSzmMask;
225}
226
227const SpellingTrie* SpellingTrie::get_cpinstance() {
228  return &get_instance();
229}
230
231SpellingTrie& SpellingTrie::get_instance() {
232  if (NULL == instance_)
233    instance_ = new SpellingTrie();
234
235  return *instance_;
236}
237
238uint16 SpellingTrie::half2full_num(uint16 half_id) const {
239  if (NULL == root_ || half_id >= kFullSplIdStart)
240    return 0;
241  return h2f_num_[half_id];
242}
243
244uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
245  if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
246    return 0;
247
248  *spl_id_start = h2f_start_[half_id];
249  return h2f_num_[half_id];
250}
251
252uint16 SpellingTrie::full_to_half(uint16 full_id) const {
253  if (NULL == root_ || full_id < kFullSplIdStart ||
254      full_id > spelling_num_ + kFullSplIdStart)
255    return 0;
256
257  return f2h_[full_id - kFullSplIdStart];
258}
259
260void SpellingTrie::free_son_trie(SpellingNode* node) {
261  if (NULL == node)
262    return;
263
264  for (size_t pos = 0; pos < node->num_of_son; pos++) {
265    free_son_trie(node->first_son + pos);
266  }
267
268  if (NULL != node->first_son)
269    delete [] node->first_son;
270}
271
272bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
273                             size_t item_num, float score_amplifier,
274                             unsigned char average_score) {
275  if (spelling_arr == NULL)
276    return false;
277
278  memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
279  memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
280
281  // If the arr is the same as the buf, means this function is called by
282  // load_table(), the table data are ready; otherwise the array should be
283  // saved.
284  if (spelling_arr != spelling_buf_) {
285    if (NULL != spelling_buf_)
286      delete [] spelling_buf_;
287    spelling_buf_ = new char[item_size * item_num];
288    if (NULL == spelling_buf_)
289      return false;
290    memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
291  }
292
293  spelling_size_ = item_size;
294  spelling_num_ = item_num;
295
296  score_amplifier_ = score_amplifier;
297  average_score_ = average_score;
298
299  if (NULL != splstr_queried_)
300    delete [] splstr_queried_;
301  splstr_queried_ = new char[spelling_size_];
302  if (NULL == splstr_queried_)
303    return false;
304
305  if (NULL != splstr16_queried_)
306    delete [] splstr16_queried_;
307  splstr16_queried_ = new char16[spelling_size_];
308  if (NULL == splstr16_queried_)
309    return false;
310
311  // First, sort the buf to ensure they are in ascendant order
312  qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
313
314#ifdef ___BUILD_MODEL___
315  node_num_ = 1;
316#endif
317
318  root_ = new SpellingNode();
319  memset(root_, 0, sizeof(SpellingNode));
320
321  dumb_node_ = new SpellingNode();
322  memset(dumb_node_, 0, sizeof(SpellingNode));
323  dumb_node_->score = average_score_;
324
325  splitter_node_ = new SpellingNode();
326  memset(splitter_node_, 0, sizeof(SpellingNode));
327  splitter_node_->score = average_score_;
328
329  memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);
330
331  root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
332
333  // Root's score should be cleared.
334  root_->score = 0;
335
336  if (NULL == root_->first_son)
337    return false;
338
339  h2f_start_[0] = h2f_num_[0] = 0;
340
341  if (!build_f2h())
342    return false;
343
344#ifdef ___BUILD_MODEL___
345  if (kPrintDebug0) {
346    printf("---SpellingTrie Nodes: %d\n", node_num_);
347  }
348  return build_ym_info();
349#else
350  return true;
351#endif
352}
353
354#ifdef ___BUILD_MODEL___
355const char* SpellingTrie::get_ym_str(const char *spl_str) {
356  bool start_ZCS = false;
357  if (is_shengmu_char(*spl_str)) {
358    if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
359      start_ZCS = true;
360    spl_str += 1;
361    if (start_ZCS && 'h' == *spl_str)
362      spl_str += 1;
363  }
364  return spl_str;
365}
366
367bool SpellingTrie::build_ym_info() {
368  bool sucess;
369  SpellingTable *spl_table = new SpellingTable();
370
371  sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
372  assert(sucess);
373
374  for (uint16 pos = 0; pos < spelling_num_; pos++) {
375    const char *spl_str = spelling_buf_ + spelling_size_ * pos;
376    spl_str = get_ym_str(spl_str);
377    if ('\0' != spl_str[0]) {
378      sucess = spl_table->put_spelling(spl_str, 0);
379      assert(sucess);
380    }
381  }
382
383  size_t ym_item_size;  // '\0' is included
384  size_t ym_num;
385  const char* ym_buf;
386  ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
387
388  if (NULL != ym_buf_)
389    delete [] ym_buf_;
390  ym_buf_ = new char[ym_item_size * ym_num];
391  if (NULL == ym_buf_) {
392    delete spl_table;
393    return false;
394  }
395
396  memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
397  ym_size_ = ym_item_size;
398  ym_num_ = ym_num;
399
400  delete spl_table;
401
402  // Generate the maping from the spelling ids to the Yunmu ids.
403  if (spl_ym_ids_)
404    delete spl_ym_ids_;
405  spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
406  if (NULL == spl_ym_ids_)
407    return false;
408
409  memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
410
411  for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
412    const char *str = get_spelling_str(id);
413
414    str = get_ym_str(str);
415    if ('\0' != str[0]) {
416      uint8 ym_id = get_ym_id(str);
417      spl_ym_ids_[id] = ym_id;
418      assert(ym_id > 0);
419    } else {
420      spl_ym_ids_[id] = 0;
421    }
422  }
423  return true;
424}
425#endif
426
427SpellingNode* SpellingTrie::construct_spellings_subset(
428    size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
429  if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
430    return NULL;
431
432  SpellingNode *first_son = NULL;
433  uint16 num_of_son = 0;
434  unsigned char min_son_score = 255;
435
436  const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
437  char char_for_node = spelling_last_start[level];
438  assert(char_for_node >= 'A' && char_for_node <= 'Z' ||
439         'h' == char_for_node);
440
441  // Scan the array to find how many sons
442  for (size_t i = item_start + 1; i < item_end; i++) {
443    const char *spelling_current = spelling_buf_ + spelling_size_ * i;
444    char char_current = spelling_current[level];
445    if (char_current != char_for_node) {
446      num_of_son++;
447      char_for_node = char_current;
448    }
449  }
450  num_of_son++;
451
452  // Allocate memory
453#ifdef ___BUILD_MODEL___
454  node_num_ += num_of_son;
455#endif
456  first_son = new SpellingNode[num_of_son];
457  memset(first_son, 0, sizeof(SpellingNode)*num_of_son);
458
459  // Now begin construct tree
460  size_t son_pos = 0;
461
462  spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
463  char_for_node = spelling_last_start[level];
464
465  bool spelling_endable = true;
466  if (spelling_last_start[level + 1] != '\0')
467    spelling_endable = false;
468
469  size_t item_start_next = item_start;
470
471  for (size_t i = item_start + 1; i < item_end; i++) {
472    const char *spelling_current = spelling_buf_ + spelling_size_ * i;
473    char char_current = spelling_current[level];
474    assert(is_valid_spl_char(char_current));
475
476    if (char_current != char_for_node) {
477      // Construct a node
478      SpellingNode *node_current = first_son + son_pos;
479      node_current->char_this_node = char_for_node;
480
481      // For quick search in the first level
482      if (0 == level)
483        level1_sons_[char_for_node - 'A'] = node_current;
484
485      if (spelling_endable) {
486        node_current->spelling_idx = kFullSplIdStart + item_start_next;
487      }
488
489      if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
490        size_t real_start = item_start_next;
491        if (spelling_last_start[level + 1] == '\0')
492          real_start++;
493
494        node_current->first_son =
495            construct_spellings_subset(real_start, i, level + 1,
496                                       node_current);
497
498        if (real_start == item_start_next + 1) {
499          uint16 score_this = static_cast<unsigned char>(
500              spelling_last_start[spelling_size_ - 1]);
501          if (score_this < node_current->score)
502            node_current->score = score_this;
503        }
504      } else {
505        node_current->first_son = NULL;
506        node_current->score = static_cast<unsigned char>(
507            spelling_last_start[spelling_size_ - 1]);
508      }
509
510      if (node_current->score < min_son_score)
511        min_son_score = node_current->score;
512
513      bool is_half = false;
514      if (level == 0 && is_szm_char(char_for_node)) {
515        node_current->spelling_idx =
516          static_cast<uint16>(char_for_node - 'A' + 1);
517
518        if (char_for_node > 'C')
519          node_current->spelling_idx++;
520        if (char_for_node > 'S')
521          node_current->spelling_idx++;
522
523        h2f_num_[node_current->spelling_idx] = i - item_start_next;
524        is_half = true;
525      } else if (level == 1 && char_for_node == 'h') {
526        char ch_level0 = spelling_last_start[0];
527        uint16 part_id = 0;
528        if (ch_level0 == 'C')
529          part_id = 'C' - 'A' + 1 + 1;
530        else if (ch_level0 == 'S')
531          part_id = 'S' - 'A' + 1 + 2;
532        else if (ch_level0 == 'Z')
533          part_id = 'Z' - 'A' + 1 + 3;
534        if (0 != part_id) {
535          node_current->spelling_idx = part_id;
536          h2f_num_[node_current->spelling_idx] = i - item_start_next;
537          is_half = true;
538        }
539      }
540
541      if (is_half) {
542        if (h2f_num_[node_current->spelling_idx] > 0)
543          h2f_start_[node_current->spelling_idx] =
544            item_start_next + kFullSplIdStart;
545        else
546          h2f_start_[node_current->spelling_idx] = 0;
547      }
548
549      // for next sibling
550      spelling_last_start = spelling_current;
551      char_for_node = char_current;
552      item_start_next = i;
553      spelling_endable = true;
554      if (spelling_current[level + 1] != '\0')
555        spelling_endable = false;
556
557      son_pos++;
558    }
559  }
560
561  // the last one
562  SpellingNode *node_current = first_son + son_pos;
563  node_current->char_this_node = char_for_node;
564
565  // For quick search in the first level
566  if (0 == level)
567    level1_sons_[char_for_node - 'A'] = node_current;
568
569  if (spelling_endable) {
570    node_current->spelling_idx = kFullSplIdStart + item_start_next;
571  }
572
573  if (spelling_last_start[level + 1] != '\0' ||
574      item_end - item_start_next > 1) {
575    size_t real_start = item_start_next;
576    if (spelling_last_start[level + 1] == '\0')
577      real_start++;
578
579    node_current->first_son =
580        construct_spellings_subset(real_start, item_end, level + 1,
581                                   node_current);
582
583    if (real_start == item_start_next + 1) {
584      uint16 score_this = static_cast<unsigned char>(
585          spelling_last_start[spelling_size_ - 1]);
586      if (score_this < node_current->score)
587        node_current->score = score_this;
588    }
589  } else {
590    node_current->first_son = NULL;
591    node_current->score = static_cast<unsigned char>(
592        spelling_last_start[spelling_size_ - 1]);
593  }
594
595  if (node_current->score < min_son_score)
596    min_son_score = node_current->score;
597
598  assert(son_pos + 1 == num_of_son);
599
600  bool is_half = false;
601  if (level == 0 && szm_is_enabled(char_for_node)) {
602    node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
603
604    if (char_for_node > 'C')
605      node_current->spelling_idx++;
606    if (char_for_node > 'S')
607      node_current->spelling_idx++;
608
609    h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
610    is_half = true;
611  } else if (level == 1 && char_for_node == 'h') {
612    char ch_level0 = spelling_last_start[0];
613    uint16 part_id = 0;
614    if (ch_level0 == 'C')
615      part_id = 'C' - 'A' + 1 + 1;
616    else if (ch_level0 == 'S')
617      part_id = 'S' - 'A' + 1 + 2;
618    else if (ch_level0 == 'Z')
619      part_id = 'Z' - 'A' + 1 + 3;
620    if (0 != part_id) {
621      node_current->spelling_idx = part_id;
622      h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
623      is_half = true;
624    }
625  }
626  if (is_half) {
627    if (h2f_num_[node_current->spelling_idx] > 0)
628      h2f_start_[node_current->spelling_idx] =
629        item_start_next + kFullSplIdStart;
630    else
631      h2f_start_[node_current->spelling_idx] = 0;
632  }
633
634  parent->num_of_son = num_of_son;
635  parent->score = min_son_score;
636  return first_son;
637}
638
639bool SpellingTrie::save_spl_trie(FILE *fp) {
640  if (NULL == fp || NULL == spelling_buf_)
641    return false;
642
643  if (fwrite(&spelling_size_, sizeof(size_t), 1, fp) != 1)
644    return false;
645
646  if (fwrite(&spelling_num_, sizeof(size_t), 1, fp) != 1)
647    return false;
648
649  if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
650    return false;
651
652  if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
653    return false;
654
655  if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
656             spelling_num_, fp) != spelling_num_)
657    return false;
658
659  return true;
660}
661
662bool SpellingTrie::load_spl_trie(FILE *fp) {
663  if (NULL == fp)
664    return false;
665
666  if (fread(&spelling_size_, sizeof(size_t), 1, fp) != 1)
667    return false;
668
669  if (fread(&spelling_num_, sizeof(size_t), 1, fp) != 1)
670    return false;
671
672  if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
673    return false;
674
675  if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
676    return false;
677
678  if (NULL != spelling_buf_)
679    delete [] spelling_buf_;
680
681  spelling_buf_ = new char[spelling_size_ * spelling_num_];
682  if (NULL == spelling_buf_)
683    return false;
684
685  if (fread(spelling_buf_, sizeof(char) * spelling_size_,
686            spelling_num_, fp) != spelling_num_)
687    return false;
688
689  return construct(spelling_buf_, spelling_size_, spelling_num_,
690                   score_amplifier_, average_score_);
691}
692
693bool SpellingTrie::build_f2h() {
694  if (NULL != f2h_)
695    delete [] f2h_;
696  f2h_ = new uint16[spelling_num_];
697  if (NULL == f2h_)
698    return false;
699
700  for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
701    for (uint16 fid = h2f_start_[hid];
702         fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
703      f2h_[fid - kFullSplIdStart] = hid;
704  }
705
706  return true;
707}
708
709size_t SpellingTrie::get_spelling_num() {
710  return spelling_num_;
711}
712
713uint8 SpellingTrie::get_ym_id(const char *ym_str) {
714  if (NULL == ym_str || NULL == ym_buf_)
715    return 0;
716
717  for (uint8 pos = 0; pos < ym_num_; pos++)
718    if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
719      return pos + 1;
720
721  return 0;
722}
723
724const char* SpellingTrie::get_spelling_str(uint16 splid) {
725  splstr_queried_[0] = '\0';
726
727  if (splid >= kFullSplIdStart) {
728    splid -= kFullSplIdStart;
729    snprintf(splstr_queried_, spelling_size_, "%s",
730             spelling_buf_ + splid * spelling_size_);
731  } else {
732    if (splid == 'C' - 'A' + 1 + 1) {
733      snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
734    } else if (splid == 'S' - 'A' + 1 + 2) {
735      snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
736    } else if (splid == 'Z' - 'A' + 1 + 3) {
737      snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
738    } else {
739      if (splid > 'C' - 'A' + 1)
740        splid--;
741      if (splid > 'S' - 'A' + 1)
742        splid--;
743      splstr_queried_[0] = 'A' + splid - 1;
744      splstr_queried_[1] = '\0';
745    }
746  }
747  return splstr_queried_;
748}
749
750const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
751  splstr16_queried_[0] = '\0';
752
753  if (splid >= kFullSplIdStart) {
754    splid -= kFullSplIdStart;
755    for (size_t pos = 0; pos < spelling_size_; pos++) {
756      splstr16_queried_[pos] = static_cast<char16>
757          (spelling_buf_[splid * spelling_size_ + pos]);
758    }
759  } else {
760    if (splid == 'C' - 'A' + 1 + 1) {
761      splstr16_queried_[0] = static_cast<char16>('C');
762      splstr16_queried_[1] = static_cast<char16>('h');
763      splstr16_queried_[2] = static_cast<char16>('\0');
764    } else if (splid == 'S' - 'A' + 1 + 2) {
765      splstr16_queried_[0] = static_cast<char16>('S');
766      splstr16_queried_[1] = static_cast<char16>('h');
767      splstr16_queried_[2] = static_cast<char16>('\0');
768    } else if (splid == 'Z' - 'A' + 1 + 3) {
769      splstr16_queried_[0] = static_cast<char16>('Z');
770      splstr16_queried_[1] = static_cast<char16>('h');
771      splstr16_queried_[2] = static_cast<char16>('\0');
772    } else {
773      if (splid > 'C' - 'A' + 1)
774        splid--;
775      if (splid > 'S' - 'A' + 1)
776        splid--;
777      splstr16_queried_[0] = 'A' + splid - 1;
778      splstr16_queried_[1] = '\0';
779    }
780  }
781  return splstr16_queried_;
782}
783
784size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
785                                        size_t splstr16_len) {
786  if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
787
788  if (splid >= kFullSplIdStart) {
789    splid -= kFullSplIdStart;
790    for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
791      splstr16[pos] = static_cast<char16>
792          (spelling_buf_[splid * spelling_size_ + pos]);
793      if (static_cast<char16>('\0') == splstr16[pos]) {
794        return pos;
795      }
796    }
797  } else {
798    if (splid == 'C' - 'A' + 1 + 1) {
799      splstr16[0] = static_cast<char16>('C');
800      splstr16[1] = static_cast<char16>('h');
801      splstr16[2] = static_cast<char16>('\0');
802      return 2;
803    } else if (splid == 'S' - 'A' + 1 + 2) {
804      splstr16[0] = static_cast<char16>('S');
805      splstr16[1] = static_cast<char16>('h');
806      splstr16[2] = static_cast<char16>('\0');
807      return 2;
808    } else if (splid == 'Z' - 'A' + 1 + 3) {
809      splstr16[0] = static_cast<char16>('Z');
810      splstr16[1] = static_cast<char16>('h');
811      splstr16[2] = static_cast<char16>('\0');
812      return 2;
813    } else {
814      if (splid > 'C' - 'A' + 1)
815        splid--;
816      if (splid > 'S' - 'A' + 1)
817        splid--;
818      splstr16[0] = 'A' + splid - 1;
819      splstr16[1] = '\0';
820      return 1;
821    }
822  }
823
824  // Not reachable.
825  return 0;
826}
827
828}  // namespace ime_pinyin
829