1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "ui/base/ime/chromeos/character_composer.h"
6
7#include <X11/Xlib.h>
8#include <X11/Xutil.h>
9
10#include <algorithm>
11#include <iterator>
12
13#include "base/strings/utf_string_conversions.h"
14#include "base/third_party/icu/icu_utf.h"
15// Note for Gtk removal: gdkkeysyms.h only contains a set of
16// '#define GDK_KeyName 0xNNNN' macros and does not #include any Gtk headers.
17#include "third_party/gtk+/gdk/gdkkeysyms.h"
18#include "ui/base/glib/glib_integers.h"
19#include "ui/events/event.h"
20#include "ui/events/event_constants.h"
21#include "ui/gfx/x/x11_types.h"
22
23// Note for Gtk removal: gtkimcontextsimpleseqs.h does not #include any Gtk
24// headers and only contains one big guint16 array |gtk_compose_seqs_compact|
25// which defines the main compose table. The table has internal linkage.
26// The order of header inclusion is out of order because
27// gtkimcontextsimpleseqs.h depends on guint16, which is defined in
28// "ui/base/glib/glib_integers.h".
29#include "third_party/gtk+/gtk/gtkimcontextsimpleseqs.h"
30
31namespace {
32
33// A black list for not composing dead keys. Once the key combination is listed
34// below, the dead key won't work even when this is listed in
35// gtkimcontextsimpleseqs.h. This only supports two keyevent sequenses.
36// TODO(nona): Remove this hack.
37const struct BlackListedDeadKey {
38  uint32 first_key;  // target first key event.
39  uint32 second_key;  // target second key event.
40  uint32 output_char;  // the character to be inserted if the filter is matched.
41  bool consume;  // true if the original key event will be consumed.
42} kBlackListedDeadKeys[] = {
43  { GDK_KEY_dead_acute, GDK_KEY_m, GDK_KEY_apostrophe, false },
44  { GDK_KEY_dead_acute, GDK_KEY_s, GDK_KEY_apostrophe, false },
45  { GDK_KEY_dead_acute, GDK_KEY_t, GDK_KEY_apostrophe, false },
46  { GDK_KEY_dead_acute, GDK_KEY_v, GDK_KEY_apostrophe, false },
47  { GDK_KEY_dead_acute, GDK_KEY_dead_acute, GDK_KEY_apostrophe, true },
48};
49
50typedef std::vector<unsigned int> ComposeBufferType;
51
52// An iterator class to apply std::lower_bound for composition table.
53class SequenceIterator
54    : public std::iterator<std::random_access_iterator_tag, const uint16*> {
55 public:
56  SequenceIterator() : ptr_(NULL), stride_(0) {}
57  SequenceIterator(const uint16* ptr, int stride)
58      : ptr_(ptr), stride_(stride) {}
59
60  const uint16* ptr() const {return ptr_;}
61  int stride() const {return stride_;}
62
63  SequenceIterator& operator++() {
64    ptr_ += stride_;
65    return *this;
66  }
67  SequenceIterator& operator+=(int n) {
68    ptr_ += stride_*n;
69    return *this;
70  }
71
72  const uint16* operator*() const {return ptr_;}
73
74 private:
75  const uint16* ptr_;
76  int stride_;
77};
78
79inline SequenceIterator operator+(const SequenceIterator& l, int r) {
80  return SequenceIterator(l) += r;
81}
82
83inline int operator-(const SequenceIterator& l, const SequenceIterator& r) {
84  const int d = l.ptr() - r.ptr();
85  DCHECK(l.stride() == r.stride() && l.stride() > 0 && d%l.stride() == 0);
86  return d/l.stride();
87}
88
89inline bool operator==(const SequenceIterator& l, const SequenceIterator& r) {
90  DCHECK(l.stride() == r.stride());
91  return l.ptr() == r.ptr();
92}
93
94inline bool operator!=(const SequenceIterator& l, const SequenceIterator& r) {
95  return !(l == r);
96}
97
98// A function to compare key value.
99inline int CompareSequenceValue(unsigned int l, unsigned int r) {
100  return (l > r) ? 1 : ((l < r) ? -1 : 0);
101}
102
103// A template to make |CompareFunc| work like operator<.
104// |CompareFunc| is required to implement a member function,
105// int operator()(const ComposeBufferType& l, const uint16* r) const.
106template<typename CompareFunc>
107struct ComparatorAdoptor {
108  bool operator()(const ComposeBufferType& l, const uint16* r) const {
109    return CompareFunc()(l, r) == -1;
110  }
111  bool operator()(const uint16* l, const ComposeBufferType& r) const {
112    return CompareFunc()(r, l) == 1;
113  }
114};
115
116class ComposeChecker {
117 public:
118  // This class does not take the ownership of |data|, |data| should be alive
119  // for the lifetime of the object.
120  // |data| is a pointer to the head of an array of
121  // length (|max_sequence_length| + 2)*|n_sequences|.
122  // Every (|max_sequence_length| + 2) elements of |data| represent an entry.
123  // First |max_sequence_length| elements of an entry is the sequecne which
124  // composes the character represented by the last two elements of the entry.
125  ComposeChecker(const uint16* data, int max_sequence_length, int n_sequences);
126  bool CheckSequence(const ComposeBufferType& sequence,
127                     uint32* composed_character) const;
128
129 private:
130  struct CompareSequence {
131    int operator()(const ComposeBufferType& l, const uint16* r) const;
132  };
133
134  // This class does not take the ownership of |data_|,
135  // the dtor does not delete |data_|.
136  const uint16* data_;
137  int max_sequence_length_;
138  int n_sequences_;
139  int row_stride_;
140
141  DISALLOW_COPY_AND_ASSIGN(ComposeChecker);
142};
143
144ComposeChecker::ComposeChecker(const uint16* data,
145                               int max_sequence_length,
146                               int n_sequences)
147    : data_(data),
148      max_sequence_length_(max_sequence_length),
149      n_sequences_(n_sequences),
150      row_stride_(max_sequence_length + 2) {
151}
152
153bool ComposeChecker::CheckSequence(const ComposeBufferType& sequence,
154                                   uint32* composed_character) const {
155  const int sequence_length = sequence.size();
156  if (sequence_length > max_sequence_length_)
157    return false;
158  // Find sequence in the table.
159  const SequenceIterator begin(data_, row_stride_);
160  const SequenceIterator end = begin + n_sequences_;
161  const SequenceIterator found = std::lower_bound(
162      begin, end, sequence, ComparatorAdoptor<CompareSequence>());
163  if (found == end || CompareSequence()(sequence, *found) != 0)
164    return false;
165
166  if (sequence_length == max_sequence_length_ ||
167      (*found)[sequence_length] == 0) {
168    // |found| is not partially matching. It's fully matching.
169    if (found + 1 == end ||
170        CompareSequence()(sequence, *(found + 1)) != 0) {
171      // There is no composition longer than |found| which matches to
172      // |sequence|.
173      const uint32 value = ((*found)[max_sequence_length_] << 16) |
174          (*found)[max_sequence_length_ + 1];
175      *composed_character = value;
176    }
177  }
178  return true;
179}
180
181int ComposeChecker::CompareSequence::operator()(const ComposeBufferType& l,
182                                                const uint16* r) const {
183  for(size_t i = 0; i < l.size(); ++i) {
184    const int compare_result = CompareSequenceValue(l[i], r[i]);
185    if(compare_result)
186      return compare_result;
187  }
188  return 0;
189}
190
191
192class ComposeCheckerWithCompactTable {
193 public:
194  // This class does not take the ownership of |data|, |data| should be alive
195  // for the lifetime of the object.
196  // First |index_size|*|index_stride| elements of |data| are an index table.
197  // Every |index_stride| elements of an index table are an index entry.
198  // If you are checking with a sequence of length N beginning with character C,
199  // you have to find an index entry whose first element is C, then get the N-th
200  // element of the index entry as the index.
201  // The index is pointing the element of |data| where the composition table for
202  // sequences of length N beginning with C is placed.
203
204  ComposeCheckerWithCompactTable(const uint16* data,
205                                 int max_sequence_length,
206                                 int index_size,
207                                 int index_stride);
208  bool CheckSequence(const ComposeBufferType& sequence,
209                     uint32* composed_character) const;
210
211 private:
212  struct CompareSequenceFront {
213    int operator()(const ComposeBufferType& l, const uint16* r) const;
214  };
215  struct CompareSequenceSkipFront {
216    int operator()(const ComposeBufferType& l, const uint16* r) const;
217  };
218
219  // This class does not take the ownership of |data_|,
220  // the dtor does not delete |data_|.
221  const uint16* data_;
222  int max_sequence_length_;
223  int index_size_;
224  int index_stride_;
225};
226
227ComposeCheckerWithCompactTable::ComposeCheckerWithCompactTable(
228    const uint16* data,
229    int max_sequence_length,
230    int index_size,
231    int index_stride)
232    : data_(data),
233      max_sequence_length_(max_sequence_length),
234      index_size_(index_size),
235      index_stride_(index_stride) {
236}
237
238bool ComposeCheckerWithCompactTable::CheckSequence(
239    const ComposeBufferType& sequence,
240    uint32* composed_character) const {
241  const int compose_length = sequence.size();
242  if (compose_length > max_sequence_length_)
243    return false;
244  // Find corresponding index for the first keypress.
245  const SequenceIterator index_begin(data_, index_stride_);
246  const SequenceIterator index_end = index_begin + index_size_;
247  const SequenceIterator index =
248      std::lower_bound(index_begin, index_end, sequence,
249                       ComparatorAdoptor<CompareSequenceFront>());
250  if (index == index_end || CompareSequenceFront()(sequence, *index) != 0)
251    return false;
252  if (compose_length == 1)
253    return true;
254  // Check for composition sequences.
255  for (int length = compose_length - 1; length < max_sequence_length_;
256       ++length) {
257    const uint16* table = data_ + (*index)[length];
258    const uint16* table_next = data_ + (*index)[length + 1];
259    if (table_next > table) {
260      // There are composition sequences for this |length|.
261      const int row_stride = length + 1;
262      const int n_sequences = (table_next - table)/row_stride;
263      const SequenceIterator table_begin(table, row_stride);
264      const SequenceIterator table_end = table_begin + n_sequences;
265      const SequenceIterator found =
266          std::lower_bound(table_begin, table_end, sequence,
267                           ComparatorAdoptor<CompareSequenceSkipFront>());
268      if (found != table_end &&
269          CompareSequenceSkipFront()(sequence, *found) == 0) {
270        if (length == compose_length - 1)  // Exact match.
271          *composed_character = (*found)[length];
272        return true;
273      }
274    }
275  }
276  return false;
277}
278
279int ComposeCheckerWithCompactTable::CompareSequenceFront::operator()(
280    const ComposeBufferType& l, const uint16* r) const {
281  return CompareSequenceValue(l[0], r[0]);
282}
283
284int ComposeCheckerWithCompactTable::CompareSequenceSkipFront::operator()(
285    const ComposeBufferType& l, const uint16* r) const {
286  for(size_t i = 1; i < l.size(); ++i) {
287    const int compare_result = CompareSequenceValue(l[i], r[i - 1]);
288    if(compare_result)
289      return compare_result;
290  }
291  return 0;
292}
293
294
295// Additional table.
296
297// The difference between this and the default input method is the handling
298// of C+acute - this method produces C WITH CEDILLA rather than C WITH ACUTE.
299// For languages that use CCedilla and not acute, this is the preferred mapping,
300// and is particularly important for pt_BR, where the us-intl keyboard is
301// used extensively.
302
303const uint16 cedilla_compose_seqs[] = {
304  // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
305  GDK_KEY_dead_acute, GDK_KEY_C, 0, 0, 0, 0x00C7,
306  // LATIN_SMALL_LETTER_C_WITH_CEDILLA
307  GDK_KEY_dead_acute, GDK_KEY_c, 0, 0, 0, 0x00E7,
308  // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
309  GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_C, 0, 0, 0x00C7,
310  // LATIN_SMALL_LETTER_C_WITH_CEDILLA
311  GDK_KEY_Multi_key, GDK_KEY_apostrophe, GDK_KEY_c, 0, 0, 0x00E7,
312  // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
313  GDK_KEY_Multi_key, GDK_KEY_C, GDK_KEY_apostrophe, 0, 0, 0x00C7,
314  // LATIN_SMALL_LETTER_C_WITH_CEDILLA
315  GDK_KEY_Multi_key, GDK_KEY_c, GDK_KEY_apostrophe, 0, 0, 0x00E7,
316};
317
318bool KeypressShouldBeIgnored(unsigned int keyval) {
319  switch(keyval) {
320    case GDK_KEY_Shift_L:
321    case GDK_KEY_Shift_R:
322    case GDK_KEY_Control_L:
323    case GDK_KEY_Control_R:
324    case GDK_KEY_Caps_Lock:
325    case GDK_KEY_Shift_Lock:
326    case GDK_KEY_Meta_L:
327    case GDK_KEY_Meta_R:
328    case GDK_KEY_Alt_L:
329    case GDK_KEY_Alt_R:
330    case GDK_KEY_Super_L:
331    case GDK_KEY_Super_R:
332    case GDK_KEY_Hyper_L:
333    case GDK_KEY_Hyper_R:
334    case GDK_KEY_Mode_switch:
335    case GDK_KEY_ISO_Level3_Shift:
336      return true;
337    default:
338      return false;
339  }
340}
341
342bool CheckCharacterComposeTable(const ComposeBufferType& sequence,
343                                uint32* composed_character) {
344  // Check cedilla compose table.
345  const ComposeChecker kCedillaComposeChecker(
346      cedilla_compose_seqs, 4, arraysize(cedilla_compose_seqs)/(4 + 2));
347  if (kCedillaComposeChecker.CheckSequence(sequence, composed_character))
348    return true;
349
350  // Check main compose table.
351  const ComposeCheckerWithCompactTable kMainComposeChecker(
352      gtk_compose_seqs_compact, 5, 24, 6);
353  if (kMainComposeChecker.CheckSequence(sequence, composed_character))
354    return true;
355
356  return false;
357}
358
359// Converts |character| to UTF16 string.
360// Returns false when |character| is not a valid character.
361bool UTF32CharacterToUTF16(uint32 character, string16* output) {
362  output->clear();
363  // Reject invalid character. (e.g. codepoint greater than 0x10ffff)
364  if (!CBU_IS_UNICODE_CHAR(character))
365    return false;
366  if (character) {
367    output->resize(CBU16_LENGTH(character));
368    size_t i = 0;
369    CBU16_APPEND_UNSAFE(&(*output)[0], i, character);
370  }
371  return true;
372}
373
374// Converts a X keycode to a X keysym with no modifiers.
375KeySym XKeyCodeToXKeySym(unsigned int keycode) {
376  XDisplay* display = gfx::GetXDisplay();
377  if (!display)
378    return NoSymbol;
379
380  XKeyEvent x_key_event = {0};
381  x_key_event.type = KeyPress;
382  x_key_event.display = display;
383  x_key_event.keycode = keycode;
384  return ::XLookupKeysym(&x_key_event, 0);
385}
386
387// Returns an hexadecimal digit integer (0 to 15) corresponding to |keyval|.
388// -1 is returned when |keyval| cannot be a hexadecimal digit.
389int KeyvalToHexDigit(unsigned int keyval) {
390  if (GDK_KEY_0 <= keyval && keyval <= GDK_KEY_9)
391    return keyval - GDK_KEY_0;
392  if (GDK_KEY_a <= keyval && keyval <= GDK_KEY_f)
393    return keyval - GDK_KEY_a + 10;
394  if (GDK_KEY_A <= keyval && keyval <= GDK_KEY_F)
395    return keyval - GDK_KEY_A + 10;
396  return -1;  // |keyval| cannot be a hexadecimal digit.
397}
398
399}  // namespace
400
401namespace ui {
402
403CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE) {}
404
405CharacterComposer::~CharacterComposer() {}
406
407void CharacterComposer::Reset() {
408  compose_buffer_.clear();
409  composed_character_.clear();
410  preedit_string_.clear();
411  composition_mode_ = KEY_SEQUENCE_MODE;
412}
413
414bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) {
415  if (!event.HasNativeEvent() ||
416      (event.type() != ET_KEY_PRESSED && event.type() != ET_KEY_RELEASED))
417    return false;
418
419  XEvent* xevent = event.native_event();
420  DCHECK(xevent);
421  KeySym keysym = NoSymbol;
422  ::XLookupString(&xevent->xkey, NULL, 0, &keysym, NULL);
423
424  return FilterKeyPressInternal(keysym, xevent->xkey.keycode, event.flags());
425}
426
427
428bool CharacterComposer::FilterKeyPressInternal(unsigned int keyval,
429                                               unsigned int keycode,
430                                               int flags) {
431  composed_character_.clear();
432  preedit_string_.clear();
433
434  // We don't care about modifier key presses.
435  if(KeypressShouldBeIgnored(keyval))
436    return false;
437
438  // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
439  // We don't care about other modifiers like Alt.  When CapsLock is down, we
440  // do nothing because what we receive is Ctrl+Shift+u (not U).
441  if (keyval == GDK_KEY_U && (flags & EF_SHIFT_DOWN) &&
442      (flags & EF_CONTROL_DOWN)) {
443    if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) {
444      // There is no ongoing composition.  Let's switch to HEX_MODE.
445      composition_mode_ = HEX_MODE;
446      UpdatePreeditStringHexMode();
447      return true;
448    }
449  }
450
451  // Filter key press in an appropriate manner.
452  switch (composition_mode_) {
453    case KEY_SEQUENCE_MODE:
454      return FilterKeyPressSequenceMode(keyval, flags);
455    case HEX_MODE:
456      return FilterKeyPressHexMode(keyval, keycode, flags);
457    default:
458      NOTREACHED();
459      return false;
460  }
461}
462
463bool CharacterComposer::FilterKeyPressSequenceMode(unsigned int keyval,
464                                                   int flags) {
465  DCHECK(composition_mode_ == KEY_SEQUENCE_MODE);
466  compose_buffer_.push_back(keyval);
467
468  if (compose_buffer_.size() == 2U) {
469    for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kBlackListedDeadKeys); ++i) {
470      if (compose_buffer_[0] == kBlackListedDeadKeys[i].first_key &&
471          compose_buffer_[1] == kBlackListedDeadKeys[i].second_key ) {
472        Reset();
473        composed_character_.push_back(kBlackListedDeadKeys[i].output_char);
474        return kBlackListedDeadKeys[i].consume;
475      }
476    }
477  }
478
479  // Check compose table.
480  uint32 composed_character_utf32 = 0;
481  if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) {
482    // Key press is recognized as a part of composition.
483    if (composed_character_utf32 != 0) {
484      // We get a composed character.
485      compose_buffer_.clear();
486      UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
487    }
488    return true;
489  }
490  // Key press is not a part of composition.
491  compose_buffer_.pop_back();  // Remove the keypress added this time.
492  if (!compose_buffer_.empty()) {
493    compose_buffer_.clear();
494    return true;
495  }
496  return false;
497}
498
499bool CharacterComposer::FilterKeyPressHexMode(unsigned int keyval,
500                                              unsigned int keycode,
501                                              int flags) {
502  DCHECK(composition_mode_ == HEX_MODE);
503  const size_t kMaxHexSequenceLength = 8;
504  int hex_digit = KeyvalToHexDigit(keyval);
505  if (hex_digit < 0) {
506    // With 101 keyboard, control + shift + 3 produces '#', but a user may
507    // have intended to type '3'.  So, if a hexadecimal character was not found,
508    // suppose a user is holding shift key (and possibly control key, too) and
509    // try a character with modifier keys removed.
510    hex_digit = KeyvalToHexDigit(XKeyCodeToXKeySym(keycode));
511  }
512
513  if (keyval == GDK_KEY_Escape) {
514    // Cancel composition when ESC is pressed.
515    Reset();
516  } else if (keyval == GDK_KEY_Return || keyval == GDK_KEY_KP_Enter ||
517             keyval == GDK_KEY_ISO_Enter ||
518             keyval == GDK_KEY_space || keyval == GDK_KEY_KP_Space) {
519    // Commit the composed character when Enter or space is pressed.
520    CommitHex();
521  } else if (keyval == GDK_KEY_BackSpace) {
522    // Pop back the buffer when Backspace is pressed.
523    if (!compose_buffer_.empty()) {
524      compose_buffer_.pop_back();
525    } else {
526      // If there is no character in |compose_buffer_|, cancel composition.
527      Reset();
528    }
529  } else if (hex_digit >= 0 &&
530             compose_buffer_.size() < kMaxHexSequenceLength) {
531    // Add the key to the buffer if it is a hex digit.
532    compose_buffer_.push_back(hex_digit);
533  }
534
535  UpdatePreeditStringHexMode();
536
537  return true;
538}
539
540void CharacterComposer::CommitHex() {
541  DCHECK(composition_mode_ == HEX_MODE);
542  uint32 composed_character_utf32 = 0;
543  for (size_t i = 0; i != compose_buffer_.size(); ++i) {
544    const uint32 digit = compose_buffer_[i];
545    DCHECK(0 <= digit && digit < 16);
546    composed_character_utf32 <<= 4;
547    composed_character_utf32 |= digit;
548  }
549  Reset();
550  UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
551}
552
553void CharacterComposer::UpdatePreeditStringHexMode() {
554  if (composition_mode_ != HEX_MODE) {
555    preedit_string_.clear();
556    return;
557  }
558  std::string preedit_string_ascii("u");
559  for (size_t i = 0; i != compose_buffer_.size(); ++i) {
560    const int digit = compose_buffer_[i];
561    DCHECK(0 <= digit && digit < 16);
562    preedit_string_ascii += digit <= 9 ? ('0' + digit) : ('a' + (digit - 10));
563  }
564  preedit_string_ = ASCIIToUTF16(preedit_string_ascii);
565}
566
567}  // namespace ui
568