11b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert# Copyright (c) 2002-2015  International Business Machines Corporation and
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# others. All Rights Reserved.
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  file:  line.txt
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#         Line Breaking Rules
7c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#         Implement default line breaking as defined by 
8c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#         Unicode Standard Annex #14 Revision 34 for Unicode 8.0
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#         http://www.unicode.org/reports/tr14/
1027f654740f2a26ad62a5c155af9199af9e69b889claireho#
1127f654740f2a26ad62a5c155af9199af9e69b889claireho#         TODO:  Rule LB 8 remains as it was in Unicode 5.2
1227f654740f2a26ad62a5c155af9199af9e69b889claireho#         This is only because of a limitation of ICU break engine implementation,
1327f654740f2a26ad62a5c155af9199af9e69b889claireho#         not because the older behavior is desirable.
141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#
151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#         This corresponds to CSS line-break=strict (BCP47 -u-lb-strict).
161b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#         It sets characters of class CJ to behave like NS.
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  Character Classes defined by TR 14.
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru!!chain;
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru!!LBCMNoChain;
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru!!lookAheadHardBreak;
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  !!lookAheadHardBreak    Described here because it is (as yet) undocumented elsewhere
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#                          and only used for the line break rules.
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#           It is used in the implementation of rule LB 10
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           which says to treat any combining mark that is not attached to a base
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           character as if it were of class AL  (alphabetic).
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           The problem occurs in the reverse rules.
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           Consider a sequence like, with correct breaks as shown
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#               LF  ID  CM  AL  AL
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#                  ^       ^       ^
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           Then consider the sequence without the initial ID (ideographic)
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#                 LF  CM  AL  AL
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#                    ^           ^
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           Our CM, which in the first example was attached to the ideograph,
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           is now unattached, becomes an alpha, and joins in with the other
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           alphas.
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           When iterating forwards, these sequences do not present any problems
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           When iterating backwards, we need to look ahead when encountering
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           a CM to see whether it attaches to something further on or not.
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           (Look-ahead in a reverse rule is looking towards the start)
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           If the CM is unattached, we need to force a break.
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           !!lookAheadHardBreak forces the run time state machine to
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           stop immediately when a look ahead rule ( '/' operator) matches,
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           and set the match position to that of the look-ahead operator,
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           no matter what other rules may be in play at the time.
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           See rule LB 19 for an example.
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$AI = [:LineBreak =  Ambiguous:];
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$AL = [:LineBreak =  Alphabetic:];
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$BA = [:LineBreak =  Break_After:];
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$BB = [:LineBreak =  Break_Before:];
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$BK = [:LineBreak =  Mandatory_Break:];
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$B2 = [:LineBreak =  Break_Both:];
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CB = [:LineBreak =  Contingent_Break:];
69103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CJ = [:LineBreak =  Conditional_Japanese_Starter:];
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CL = [:LineBreak =  Close_Punctuation:];
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM = [:LineBreak =  Combining_Mark:];
7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$CP = [:LineBreak =  Close_Parenthesis:];
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CR = [:LineBreak =  Carriage_Return:];
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$EX = [:LineBreak =  Exclamation:];
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$GL = [:LineBreak =  Glue:];
76103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$HL = [:LineBreak =  Hebrew_Letter:];
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$HY = [:LineBreak =  Hyphen:];
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$H2 = [:LineBreak =  H2:];
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$H3 = [:LineBreak =  H3:];
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$ID = [:LineBreak =  Ideographic:];
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$IN = [:LineBreak =  Inseperable:];
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$IS = [:LineBreak =  Infix_Numeric:];
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$JL = [:LineBreak =  JL:];
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$JV = [:LineBreak =  JV:];
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$JT = [:LineBreak =  JT:];
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LF = [:LineBreak =  Line_Feed:];
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$NL = [:LineBreak =  Next_Line:];
88103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$NS = [[:LineBreak =  Nonstarter:] $CJ];
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$NU = [:LineBreak =  Numeric:];
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$OP = [:LineBreak =  Open_Punctuation:];
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$PO = [:LineBreak =  Postfix_Numeric:];
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$PR = [:LineBreak =  Prefix_Numeric:];
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$QU = [:LineBreak =  Quotation:];
9454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$RI = [:LineBreak =  Regional_Indicator:];
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SA = [:LineBreak =  Complex_Context:];
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SG = [:LineBreak =  Surrogate:];
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SP = [:LineBreak =  Space:];
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SY = [:LineBreak =  Break_Symbols:];
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$WJ = [:LineBreak =  Word_Joiner:];
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$XX = [:LineBreak =  Unknown:];
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$ZW = [:LineBreak =  ZWSpace:];
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#   Dictionary character set, for triggering language-based break engines. Currently
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#   limited to LineBreak=Complex_Context. Note that this set only works in Unicode
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#   5.0 or later as the definition of Complex_Context was corrected to include all
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#   characters requiring dictionary break.
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$dictionary = [:LineBreak = Complex_Context:];
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  Rule LB1.  By default, treat AI  (characters with ambiguous east Asian width),
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#                               SA  (South East Asian: Thai, Lao, Khmer)
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#                               SG  (Unpaired Surrogates)
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#                               XX  (Unknown, unassigned)
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#                         as $AL  (Alphabetic)
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$ALPlus = [$AL $AI $SA $SG $XX];
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  Combining Marks.   X $CM*  behaves as if it were X.  Rule LB6.
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$ALcm = $ALPlus $CM*;
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$BAcm = $BA $CM*;
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$BBcm = $BB $CM*;
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$B2cm = $B2 $CM*;
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CLcm = $CL $CM*;
12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$CPcm = $CP $CM*;
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$EXcm = $EX $CM*;
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$GLcm = $GL $CM*;
130103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$HLcm = $HL $CM*;
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$HYcm = $HY $CM*;
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$H2cm = $H2 $CM*;
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$H3cm = $H3 $CM*;
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$IDcm = $ID $CM*;
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$INcm = $IN $CM*;
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$IScm = $IS $CM*;
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$JLcm = $JL $CM*;
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$JVcm = $JV $CM*;
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$JTcm = $JT $CM*;
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$NScm = $NS $CM*;
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$NUcm = $NU $CM*;
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$OPcm = $OP $CM*;
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$POcm = $PO $CM*;
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$PRcm = $PR $CM*;
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$QUcm = $QU $CM*;
14654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$RIcm = $RI $CM*;
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SYcm = $SY $CM*;
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$WJcm = $WJ $CM*;
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru## -------------------------------------------------
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru!!forward;
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  Each class of character can stand by itself as an unbroken token, with trailing combining stuff
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$ALPlus $CM+;
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$BA $CM+;
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$BB $CM+;
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$B2 $CM+;
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CL $CM+;
16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$CP $CM+;
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$EX $CM+;
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$GL $CM+;
165103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$HL $CM+;
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$HY $CM+;
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$H2 $CM+;
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$H3 $CM+;
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$ID $CM+;
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$IN $CM+;
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$IS $CM+;
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$JL $CM+;
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$JV $CM+;
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$JT $CM+;
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$NS $CM+;
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$NU $CM+;
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$OP $CM+;
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$PO $CM+;
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$PR $CM+;
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$QU $CM+;
18154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$RI $CM+;
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SY $CM+;
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$WJ $CM+;
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# CAN_CM  is the set of characters that may combine with CM combining chars.
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#         Note that Linebreak UAX 14's concept of a combining char and the rules
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#         for what they can combine with are _very_ different from the rest of Unicode.
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#         Note that $CM itself is left out of this set.  If CM is needed as a base
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#         it must be listed separately in the rule.
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CAN_CM  = [^$SP $BK $CR $LF $NL $ZW $CM];       # Bases that can   take CMs
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# AL_FOLLOW  set of chars that can unconditionally follow an AL
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#            Needed in rules where stand-alone $CM s are treated as AL.
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#            Chaining is disabled with CM because it causes other failures,
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#            so for this one case we need to manually list out longer sequences.
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP];
203103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$AL_FOLLOW_CM   = [$CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HY $NS $IN $NU $ALPlus];
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$AL_FOLLOW      = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM];
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  Rule LB 4, 5    Mandatory (Hard) breaks.
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB4Breaks    = [$BK $CR $LF $NL];
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB4NonBreaks = [^$BK $CR $LF $NL];
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CR $LF {100};
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  LB 6    Do not break before hard line breaks.
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB4NonBreaks?  $LB4Breaks {100};    # LB 5  do not break before hard breaks.
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CAN_CM $CM*    $LB4Breaks {100};
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+            $LB4Breaks {100};
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 7         x SP
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#              x ZW
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB4NonBreaks [$SP $ZW];
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CAN_CM $CM*  [$SP $ZW];
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+          [$SP $ZW];
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 8         Break after zero width space
22927f654740f2a26ad62a5c155af9199af9e69b889claireho#              TODO:  ZW SP* <break>
23027f654740f2a26ad62a5c155af9199af9e69b889claireho#              An engine change is required to write the reverse rule for this.
23127f654740f2a26ad62a5c155af9199af9e69b889claireho#              For now, leave the Unicode 5.2 rule, ZW <break>
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB8Breaks    = [$LB4Breaks $ZW];
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL 
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#                                $CM not covered by the above needs to behave like $AL   
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#                                See definition of $CAN_CM.
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CAN_CM $CM+;                   #  Stick together any combining sequences that don't match other rules.
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+;
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 11  Do not break before or after WORD JOINER & related characters.
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CAN_CM $CM*  $WJcm;
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB8NonBreaks $WJcm;
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+          $WJcm;
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru$WJcm $CANT_CM;
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$WJcm $CAN_CM $CM*;
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 12  Do not break after NBSP and related characters.
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#         GL  x
257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#
258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru$GLcm $CAN_CM $CM*;
259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru$GLcm $CANT_CM;
260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 
261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#
262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 12a  Do not break before NBSP and related characters ...
263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#            [^SP BA HY] x GL
264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#
265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GLcm;
266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru$CM+ GLcm;
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 13   Don't break before ']' or '!' or ';' or '/', even after spaces.
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB8NonBreaks $CL;
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CAN_CM $CM*  $CL;
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+          $CL;              # by rule 10, stand-alone CM behaves as AL
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$LB8NonBreaks $CP;
27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$CAN_CM $CM*  $CP;
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$CM+          $CP;              # by rule 10, stand-alone CM behaves as AL
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB8NonBreaks $EX;
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CAN_CM $CM*  $EX;
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+          $EX;              # by rule 10, stand-alone CM behaves as AL
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB8NonBreaks $IS;
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CAN_CM $CM*  $IS;
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+          $IS;              # by rule 10, stand-alone CM behaves as AL
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB8NonBreaks $SY;
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CAN_CM $CM*  $SY;
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+          $SY;              # by rule 10, stand-alone CM behaves as AL
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 14  Do not break after OP, even after spaces
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$OPcm $SP* $CAN_CM $CM*;
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$OPcm $SP* $CANT_CM;
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$OPcm $SP+ $CM+ $AL_FOLLOW?;    # by rule 10, stand-alone CM behaves as AL
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 15
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$QUcm $SP* $OPcm;
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 16
30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho($CLcm | $CPcm) $SP* $NScm;
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 17
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$B2cm $SP* $B2cm;
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 18  Break after spaces.
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB18NonBreaks = [$LB8NonBreaks - [$SP]];
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB18Breaks    = [$LB8Breaks $SP];
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 19
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#         x QU
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB18NonBreaks $CM* $QUcm;
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+                $QUcm;
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#         QU  x
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$QUcm .?;
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$QUcm $LB18NonBreaks $CM*;    # Don't let a combining mark go onto $CR, $BK, etc.
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              #  TODO:  I don't think this rule is needed.
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 20
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#        <break>  $CB
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#        $CB   <break>
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB20NonBreaks = [$LB18NonBreaks - $CB];
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 21        x   (BA | HY | NS)
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#           BB x
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB20NonBreaks $CM* ($BAcm | $HYcm | $NScm); 
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$BBcm [^$CB];                                  #  $BB  x
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$BBcm $LB20NonBreaks $CM*;
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
343103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius# LB 21a Don't break after Hebrew + Hyphen
344103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#   HL (HY | BA) x
345103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#  
346103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$HLcm ($HYcm | $BAcm) [^$CB]?;
347103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
34859d709d503bab6e2b61931737e662dd293b40578ccornelius# LB 21b (forward) Don't break between SY and HL
34959d709d503bab6e2b61931737e662dd293b40578ccornelius# (break between HL and SY already disallowed by LB 13 above)
35059d709d503bab6e2b61931737e662dd293b40578ccornelius$SYcm $HLcm;
35159d709d503bab6e2b61931737e662dd293b40578ccornelius
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 22
353103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius($ALcm | $HLcm) $INcm;
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+     $INcm;     #  by rule 10, any otherwise unattached CM behaves as AL
355c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert$EXcm    $INcm;
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$IDcm    $INcm;
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$INcm    $INcm;
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$NUcm    $INcm;
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# $LB 23
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$IDcm  $POcm;
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$ALcm  $NUcm;       # includes $LB19
364103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$HLcm  $NUcm;
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+   $NUcm;       # Rule 10, any otherwise unattached CM behaves as AL
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$NUcm  $ALcm;
367103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$NUcm  $HLcm;
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 24
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$PRcm $IDcm;
373103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$PRcm ($ALcm | $HLcm);
374103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$POcm ($ALcm | $HLcm);
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 25   Numbers.
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho($PRcm | $POcm)? ($OPcm | $HYcm)? $NUcm ($NUcm | $SYcm | $IScm)* ($CLcm | $CPcm)? ($PRcm | $POcm)?;
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 26  Do not break a Korean syllable
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$JLcm ($JLcm | $JVcm | $H2cm | $H3cm);
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru($JVcm | $H2cm) ($JVcm | $JTcm);
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru($JTcm | $H3cm) $JTcm;
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 27  Treat korean Syllable Block the same as ID  (don't break it)
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $INcm;
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $POcm;
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$PRcm ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm);
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 28   Do not break between alphabetics
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
395103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius($ALcm | $HLcm) ($ALcm | $HLcm);
396103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM+ ($ALcm | $HLcm);      # The $CM+ is from rule 10, an unattached CM is treated as AL
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 29
399103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$IScm ($ALcm | $HLcm);
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho# LB 30
402103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius($ALcm | $HLcm | $NUcm) $OPcm;
40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$CM+ $OPcm;         # The $CM+ is from rule 10, an unattached CM is treated as AL.          
404103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CPcm ($ALcm | $HLcm | $NUcm);
40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
40654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# LB 30a  Do not break between regional indicators.
40754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$RIcm $RIcm;
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  Reverse Rules.
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru## -------------------------------------------------
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru!!reverse;
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $ALPlus;
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $BA;
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $BB;
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $B2;
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $CL;
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$CM+ $CP;
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $EX;
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $GL;
424103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM+ $HL;
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $HY;
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $H2;
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $H3;
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $ID;
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $IN;
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $IS;
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $JL;
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $JV;
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $JT;
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $NS;
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $NU;
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $OP;
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $PO;
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $PR;
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $QU;
44054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$CM+ $RI;
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $SY;
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $WJ;
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+;
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  Sequences of the form  (shown forwards)
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#      [CANT_CM]  <break>  [CM]  [whatever]
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  The CM needs to behave as an AL
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$AL_FOLLOW $CM+ / (
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          [$BK $CR $LF $NL $ZW {eof}] |
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          $SP+ $CM+ $SP |
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to surpress this break.
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                               #  LB14 says    OP SP* x .        
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                               #    becomes    OP SP* x AL
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                               #    becomes    OP SP* x CM+ AL_FOLLOW
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                               #
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                               # Further note:  the $AL in [$AL {eof}] is only to work around
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                               #                a rule compiler bug which complains about
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                               #                empty sets otherwise.
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  Sequences of the form  (shown forwards)
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#      [CANT_CM]  <break> [CM]  <break>  [PR]
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  The CM needs to behave as an AL
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  This rule is concerned about getting the second of the two <breaks> in place.
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru[$PR   ] / $CM+ [$BK $CR $LF $NL $ZW $SP {eof}];
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 4, 5, 5
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB4Breaks [$LB4NonBreaks-$CM];
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LB4Breaks $CM+ $CAN_CM;
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$LF $CR;
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 7         x SP
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#              x ZW
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru[$SP $ZW] [$LB4NonBreaks-$CM];
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru[$SP $ZW] $CM+ $CAN_CM;
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
48627f654740f2a26ad62a5c155af9199af9e69b889claireho# LB 8 ZW SP* <break>
48727f654740f2a26ad62a5c155af9199af9e69b889claireho#     TODO: to implement this, we need more than one look-ahead hard break in play at a time.
48827f654740f2a26ad62a5c155af9199af9e69b889claireho#           Requires an engine enhancement.
48927f654740f2a26ad62a5c155af9199af9e69b889claireho#   / $SP* $ZW
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 9,10  Combining marks.
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#    X   $CM needs to behave like X, where X is not $SP or controls.
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#    $CM not covered by the above needs to behave like $AL
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Stick together any combining sequences that don't match other rules.
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $CAN_CM;
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 11
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $WJ $CM* $CAN_CM;
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $WJ      [$LB8NonBreaks-$CM];
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     $CANT_CM $CM* $WJ;
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $CAN_CM  $CM* $WJ;
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 12a
506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#      [^SP BA HY] x GL
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru$CM* $GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HY]];
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 12
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#     GL  x
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CANT_CM $CM* $GL;
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $CAN_CM $CM* $GL;
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 13
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CL $CM+ $CAN_CM;
51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$CP $CM+ $CAN_CM;
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$EX $CM+ $CAN_CM;
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$IS $CM+ $CAN_CM;
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SY $CM+ $CAN_CM;
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CL [$LB8NonBreaks-$CM];
52550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$CP [$LB8NonBreaks-$CM];
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$EX [$LB8NonBreaks-$CM];
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$IS [$LB8NonBreaks-$CM];
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SY [$LB8NonBreaks-$CM];
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Rule 13 & 14 taken together for an edge case.
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#   Match this, shown forward
53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#     OP SP+  ($CM+ behaving as $AL) (CL | CP | EX | IS | IY)
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#   This really wants to chain at the $CM+ (which is acting as an $AL)
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#   except for $CM chaining being disabled.
53550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho[$CL $CP $EX $IS $SY] $CM+ $SP+ $CM* $OP;  
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 14    OP SP* x
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $CAN_CM    $SP* $CM* $OP;
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     $CANT_CM   $SP* $CM* $OP;
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$AL_FOLLOW? $CM+  $SP $SP* $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP* $CM* $OP
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     $AL_FOLLOW_NOCM $CM+ $SP+ $CM* $OP;
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $AL_FOLLOW_CM   $CM+ $SP+ $CM* $OP;
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SY $CM $SP+ $OP;   # TODO:  Experiment.  Remove.
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 15
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $OP $SP* $CM* $QU;
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 16
55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$CM* $NS $SP* $CM* ($CL | $CP);
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 17
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $B2 $SP* $CM* $B2;
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 18  break after spaces
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#        Nothing explicit needed here.
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 19
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $QU $CM* $CAN_CM;                                #   . x QU
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $QU      $LB18NonBreaks;
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $CAN_CM  $CM* $QU;                               #   QU x .
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     $CANT_CM $CM* $QU;
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  LB 20  Break before and after CB.
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#         nothing needed here.
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 21
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* ($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* [$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru[^$CB] $CM* $BB;                                      # 
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
583103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius# LB21a
584103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius[^$CB] $CM* ($HY | $BA) $CM* $HL;
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
58659d709d503bab6e2b61931737e662dd293b40578ccornelius# LB21b (reverse)
58759d709d503bab6e2b61931737e662dd293b40578ccornelius$CM* $HL $CM* $SY;
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 22
590103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM* $IN $CM* ($ALPlus | $HL);
591c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert$CM* $IN $CM* $EX;
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $IN $CM* $ID;
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $IN $CM* $IN;
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $IN $CM* $NU;
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 23
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $PO $CM* $ID;
598103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM* $NU $CM* ($ALPlus | $HL);
599103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM* ($ALPlus | $HL) $CM* $NU;
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 24
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $ID $CM* $PR;
603103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM* ($ALPlus | $HL) $CM* $PR;
604103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM* ($ALPlus | $HL) $CM* $PO;
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 25
60850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 26
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* ($H3 | $H2 | $JV | $JL) $CM* $JL;
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* ($JT | $JV) $CM* ($H2 | $JV);
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $JT $CM* ($H3 | $JT);
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 27
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* $PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM* ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 28
621103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM* ($ALPlus | $HL) $CM* ($ALPlus | $HL);
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# LB 29
625103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM* ($ALPlus | $HL) $CM* $IS;
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
62750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho# LB 30
628103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM* $OP $CM* ($ALPlus | $HL | $NU);
629103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM* ($ALPlus | $HL | $NU) $CM* $CP;
63050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
63154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# LB 30a
63254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$CM* $RI $CM* $RI;
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru## -------------------------------------------------
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru!!safe_reverse;
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 9
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$CM+ $SP / .;
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 14
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SP+ $CM* $OP;
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 15
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SP+ $CM* $QU;
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 16
64950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho$SP+ $CM* ($CL | $CP);
650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 17
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$SP+ $CM* $B2;
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
654103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius# LB 21
655103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius$CM* ($HY | $BA) $CM* $HL;
656103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
657c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru# LB 25
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru($CM* ($IS | $SY))+ $CM* $NU;
65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho($CL | $CP) $CM* ($NU | $IS | $SY);
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# For dictionary-based break
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$dictionary $dictionary;
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru## -------------------------------------------------
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru!!safe_forward;
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# Skip forward over all character classes that are involved in
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#   rules containing patterns with possibly more than one char
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#   of context.
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  It might be slightly more efficient to have specific rules
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  instead of one generic one, but only if we could
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  turn off rule chaining.  We don't want to move more
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#  than necessary.
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#
677103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $dictionary];
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru$dictionary $dictionary;
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
680