1#
2#   Copyright (C) 2002-2010, International Business Machines Corporation and others.
3#       All Rights Reserved.
4#
5#   file:  char.txt 
6#
7#   ICU Character Break Rules, also known as Grapheme Cluster Boundaries
8#      See Unicode Standard Annex #29.
9#      These rules are based on TR29 Revision 16, for Unicode Version 6.0
10#
11
12#
13#  Character Class Definitions.
14#
15$CR          = [\p{Grapheme_Cluster_Break = CR}];
16$LF          = [\p{Grapheme_Cluster_Break = LF}];
17$Control     = [\p{Grapheme_Cluster_Break = Control}];
18$Prepend     = [\p{Grapheme_Cluster_Break = Prepend}];
19$Extend      = [\p{Grapheme_Cluster_Break = Extend}];
20$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
21
22#
23# Korean Syllable Definitions
24#
25$L       = [\p{Grapheme_Cluster_Break = L}];
26$V       = [\p{Grapheme_Cluster_Break = V}];
27$T       = [\p{Grapheme_Cluster_Break = T}];
28
29$LV      = [\p{Grapheme_Cluster_Break = LV}];
30$LVT     = [\p{Grapheme_Cluster_Break = LVT}];
31
32
33## -------------------------------------------------
34!!chain;
35
36!!forward;
37
38$CR $LF;
39
40$L ($L | $V | $LV | $LVT);
41($LV | $V) ($V | $T);
42($LVT | $T) $T;
43
44[^$Control $CR $LF] $Extend;
45
46[^$Control $CR $LF] $SpacingMark;
47$Prepend [^$Control $CR $LF];
48
49
50## -------------------------------------------------
51
52!!reverse;
53$LF $CR;
54($L | $V | $LV | $LVT) $L;
55($V | $T) ($LV | $V);
56$T ($LVT | $T);
57
58$Extend      [^$Control $CR $LF];
59$SpacingMark [^$Control $CR $LF];
60[^$Control $CR $LF] $Prepend;
61
62
63## -------------------------------------------------
64
65!!safe_reverse;
66
67
68## -------------------------------------------------
69
70!!safe_forward;
71
72