1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
259d709d503bab6e2b61931737e662dd293b40578ccornelius# Copyright (C) 2002-2013, International Business Machines Corporation 
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# and others. All Rights Reserved.
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho# file:  word_POSIX.txt
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# ICU Word Break Rules, POSIX locale.
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#      See Unicode Standard Annex #29.
959d709d503bab6e2b61931737e662dd293b40578ccornelius#      These rules are based on UAX #29 Revision 22 for Unicode Version 6.3
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Note:  Updates to word.txt will usually need to be merged into
1254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#        word_POSIX.txt also.
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru##############################################################################
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#  Character class definitions from TR 29
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru##############################################################################
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru!!chain;
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#  Character Class Definitions.
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
2759d709d503bab6e2b61931737e662dd293b40578ccornelius$CR                 = [\p{Word_Break = CR}];
2859d709d503bab6e2b61931737e662dd293b40578ccornelius$LF                 = [\p{Word_Break = LF}];
2959d709d503bab6e2b61931737e662dd293b40578ccornelius$Newline            = [\p{Word_Break = Newline}];
3059d709d503bab6e2b61931737e662dd293b40578ccornelius$Extend             = [\p{Word_Break = Extend}];
3154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
3259d709d503bab6e2b61931737e662dd293b40578ccornelius$Format             = [\p{Word_Break = Format}];
3359d709d503bab6e2b61931737e662dd293b40578ccornelius$Katakana           = [\p{Word_Break = Katakana}];
3459d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_Letter      = [\p{Word_Break = Hebrew_Letter}];
3559d709d503bab6e2b61931737e662dd293b40578ccornelius$ALetter            = [\p{Word_Break = ALetter}];
3659d709d503bab6e2b61931737e662dd293b40578ccornelius$Single_Quote       = [\p{Word_Break = Single_Quote}];
3759d709d503bab6e2b61931737e662dd293b40578ccornelius$Double_Quote       = [\p{Word_Break = Double_Quote}];
3859d709d503bab6e2b61931737e662dd293b40578ccornelius$MidNumLet          = [\p{Word_Break = MidNumLet} - [.]];
3959d709d503bab6e2b61931737e662dd293b40578ccornelius$MidLetter          = [\p{Word_Break = MidLetter}];
4059d709d503bab6e2b61931737e662dd293b40578ccornelius$MidNum             = [\p{Word_Break = MidNum} [.]];
4159d709d503bab6e2b61931737e662dd293b40578ccornelius$Numeric            = [\p{Word_Break = Numeric}];
4259d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
4359d709d503bab6e2b61931737e662dd293b40578ccornelius
4459d709d503bab6e2b61931737e662dd293b40578ccornelius$Han                = [:Han:];
4559d709d503bab6e2b61931737e662dd293b40578ccornelius$Hiragana           = [:Hiragana:];
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#   Dictionary character set, for triggering language-based break engines. Currently
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#   limited to LineBreak=Complex_Context. Note that this set only works in Unicode
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#   5.0 or later as the definition of Complex_Context was corrected to include all
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#   characters requiring dictionary break.
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
5385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho$Control        = [\p{Grapheme_Cluster_Break = Control}]; 
5454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$HangulSyllable = [\uac00-\ud7a3];
5554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$ComplexContext = [:LineBreak = Complex_Context:];
5654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$KanaKanji      = [$Han $Hiragana $Katakana];
5754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$dictionaryCJK  = [$KanaKanji $HangulSyllable];
5854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$dictionary     = [$ComplexContext $dictionaryCJK];
5954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
6054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# leave CJK scripts out of ALetterPlus
6154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$ALetterPlus  = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
6254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
6585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#  Rules 4    Ignore Format and Extend characters, 
6685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#             except when they appear at the beginning of a region of text.
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
6854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# TODO: check if handling of katakana in dictionary makes rules incorrect/void
6959d709d503bab6e2b61931737e662dd293b40578ccornelius$KatakanaEx           = $Katakana           ($Extend |  $Format)*;
7059d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_LetterEx      = $Hebrew_Letter      ($Extend |  $Format)*;
7159d709d503bab6e2b61931737e662dd293b40578ccornelius$ALetterEx            = $ALetterPlus        ($Extend |  $Format)*;
7259d709d503bab6e2b61931737e662dd293b40578ccornelius$Single_QuoteEx       = $Single_Quote       ($Extend |  $Format)*;
7359d709d503bab6e2b61931737e662dd293b40578ccornelius$Double_QuoteEx       = $Double_Quote       ($Extend |  $Format)*;
7459d709d503bab6e2b61931737e662dd293b40578ccornelius$MidNumLetEx          = $MidNumLet          ($Extend |  $Format)*;
7559d709d503bab6e2b61931737e662dd293b40578ccornelius$MidLetterEx          = $MidLetter          ($Extend |  $Format)*;
7659d709d503bab6e2b61931737e662dd293b40578ccornelius$MidNumEx             = $MidNum             ($Extend |  $Format)*;
7759d709d503bab6e2b61931737e662dd293b40578ccornelius$NumericEx            = $Numeric            ($Extend |  $Format)*;
7859d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx       = $ExtendNumLet       ($Extend |  $Format)*;
7954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$Regional_IndicatorEx = $Regional_Indicator ($Extend |  $Format)*;
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
8185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho$Ideographic    = [\p{Ideographic}];
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$HiraganaEx     = $Hiragana     ($Extend |  $Format)*;
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$IdeographicEx  = $Ideographic  ($Extend |  $Format)*;
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru## -------------------------------------------------
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru!!forward;
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Rule 3 - CR x LF
9185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#
9285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho$CR $LF;
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#          of a region of Text.   The rule here comes into play when the start of text
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#          begins with a group of Format chars, or with a "word" consisting of a single
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#          char that is not in any of the listed word break categories followed by
9854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#          format char(s), or is not a CJK dictionary character.
9959d709d503bab6e2b61931737e662dd293b40578ccornelius[^$CR $LF $Newline]? ($Extend |  $Format)+;
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$NumericEx {100};
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$ALetterEx {200};
10354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$HangulSyllable {200};
10459d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_LetterEx{200};
10554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$KatakanaEx {400};       # note:  these status values override those from rule 5
10654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$HiraganaEx {400};       #        by virtue of being numerically larger.
10785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho$IdeographicEx {400};    #
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
10985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 5
11185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#    Do not break between most letters.
11285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#
11359d709d503bab6e2b61931737e662dd293b40578ccornelius($ALetterEx | $Hebrew_LetterEx)  ($ALetterEx | $Hebrew_LetterEx) {200};
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 6 and 7
11659d709d503bab6e2b61931737e662dd293b40578ccornelius($ALetterEx | $Hebrew_LetterEx) ($MidLetterEx | $MidNumLetEx | $Single_QuoteEx) ($ALetterEx | $Hebrew_LetterEx) {200};
11759d709d503bab6e2b61931737e662dd293b40578ccornelius
11859d709d503bab6e2b61931737e662dd293b40578ccornelius# rule 7a
11959d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_LetterEx $Single_QuoteEx {200};
12059d709d503bab6e2b61931737e662dd293b40578ccornelius
12159d709d503bab6e2b61931737e662dd293b40578ccornelius# rule 7b and 7c
12259d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_LetterEx $Double_QuoteEx $Hebrew_LetterEx {200};
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 8
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$NumericEx $NumericEx {100};
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 9
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13059d709d503bab6e2b61931737e662dd293b40578ccornelius($ALetterEx | $Hebrew_LetterEx) $NumericEx {200};
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 10
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13459d709d503bab6e2b61931737e662dd293b40578ccornelius$NumericEx ($ALetterEx | $Hebrew_LetterEx) {200};
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 11 and 12 
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13859d709d503bab6e2b61931737e662dd293b40578ccornelius$NumericEx ($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx {100};
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 13
14154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# to be consistent with $KanaKanji $KanaKanhi, changed
14254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# from 300 to 400.
14354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# See also TestRuleStatus in intltest/rbbiapts.cpp
14454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$KatakanaEx  $KatakanaEx {400};
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 13a/b
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
14859d709d503bab6e2b61931737e662dd293b40578ccornelius$ALetterEx       $ExtendNumLetEx {200};    #  (13a)
14959d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_LetterEx $ExtendNumLetEx {200};    #  (13a)
15059d709d503bab6e2b61931737e662dd293b40578ccornelius$NumericEx       $ExtendNumLetEx {100};    #  (13a)
15159d709d503bab6e2b61931737e662dd293b40578ccornelius$KatakanaEx      $ExtendNumLetEx {400};    #  (13a)
15259d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx  $ExtendNumLetEx {200};    #  (13a)
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15459d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx  $ALetterEx      {200};    #  (13b)
15559d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx  $Hebrew_Letter  {200};    #  (13b)
15659d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx  $NumericEx      {100};    #  (13b)
15759d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx  $KatakanaEx     {400};    #  (13b)
15854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
15954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# rule 13c
16054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
16154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$Regional_IndicatorEx $Regional_IndicatorEx;
16254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
16354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# special handling for CJK characters: chain for later dictionary segmentation
16454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$HangulSyllable $HangulSyllable {200};
16554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found 
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru## -------------------------------------------------
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru!!reverse;
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17259d709d503bab6e2b61931737e662dd293b40578ccornelius$BackHebrew_LetterEx      = ($Format | $Extend)* $Hebrew_Letter;
17354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackALetterEx            = ($Format | $Extend)* $ALetterPlus;
17459d709d503bab6e2b61931737e662dd293b40578ccornelius$BackSingle_QuoteEx       = ($Format | $Extend)* $Single_Quote;
17559d709d503bab6e2b61931737e662dd293b40578ccornelius$BackDouble_QuoteEx       = ($Format | $Extend)* $Double_Quote;
17654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackMidNumLetEx          = ($Format | $Extend)* $MidNumLet;
17754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackNumericEx            = ($Format | $Extend)* $Numeric;
17854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackMidNumEx             = ($Format | $Extend)* $MidNum;
17954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackMidLetterEx          = ($Format | $Extend)* $MidLetter;
18054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackKatakanaEx           = ($Format | $Extend)* $Katakana;
18154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackHiraganaEx           = ($Format | $Extend)* $Hiragana;
18254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackExtendNumLetEx       = ($Format | $Extend)* $ExtendNumLet;
18354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackRegional_IndicatorEx = ($Format | $Extend)* $Regional_Indicator;
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 3
18685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho$LF $CR;
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 4
18959d709d503bab6e2b61931737e662dd293b40578ccornelius($Format | $Extend)*  [^$CR $LF $Newline]?;
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 5
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
19359d709d503bab6e2b61931737e662dd293b40578ccornelius($BackALetterEx | $BackHebrew_LetterEx) ($BackALetterEx | $BackHebrew_LetterEx);
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 6 and 7
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
19759d709d503bab6e2b61931737e662dd293b40578ccornelius($BackALetterEx | $BackHebrew_LetterEx) ($BackMidLetterEx | $BackMidNumLetEx | $BackSingle_QuoteEx) ($BackALetterEx | $BackHebrew_LetterEx);
19859d709d503bab6e2b61931737e662dd293b40578ccornelius
19959d709d503bab6e2b61931737e662dd293b40578ccornelius# rule 7a
20059d709d503bab6e2b61931737e662dd293b40578ccornelius$BackSingle_QuoteEx $BackHebrew_LetterEx;
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20259d709d503bab6e2b61931737e662dd293b40578ccornelius# Rule 7b and 7c
20359d709d503bab6e2b61931737e662dd293b40578ccornelius$BackHebrew_LetterEx $BackDouble_QuoteEx $BackHebrew_LetterEx;
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 8
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$BackNumericEx $BackNumericEx;
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 9
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21159d709d503bab6e2b61931737e662dd293b40578ccornelius$BackNumericEx ($BackALetterEx | $BackHebrew_LetterEx);
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 10
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21559d709d503bab6e2b61931737e662dd293b40578ccornelius($BackALetterEx | $BackHebrew_LetterEx) $BackNumericEx;
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 11 and 12
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21959d709d503bab6e2b61931737e662dd293b40578ccornelius$BackNumericEx ($BackMidNumEx | $BackMidNumLetEx | $BackSingle_QuoteEx) $BackNumericEx;
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 13
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$BackKatakanaEx $BackKatakanaEx;
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rules 13 a/b
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#
22759d709d503bab6e2b61931737e662dd293b40578ccornelius$BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
22859d709d503bab6e2b61931737e662dd293b40578ccornelius($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx; 
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# rule 13c
23154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
23254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackRegional_IndicatorEx $BackRegional_IndicatorEx;
23354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
23454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# special handling for CJK characters: chain for later dictionary segmentation
23554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$HangulSyllable $HangulSyllable;
23654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
23754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru## -------------------------------------------------
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru!!safe_reverse;
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 3
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru($Extend | $Format)+ .?;
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 6
24659d709d503bab6e2b61931737e662dd293b40578ccornelius($MidLetter | $MidNumLet | $Single_Quote) ($BackALetterEx | $BackHebrew_LetterEx);
24759d709d503bab6e2b61931737e662dd293b40578ccornelius
24859d709d503bab6e2b61931737e662dd293b40578ccornelius# rule 7b
24959d709d503bab6e2b61931737e662dd293b40578ccornelius$Double_Quote $BackHebrew_LetterEx;
25059d709d503bab6e2b61931737e662dd293b40578ccornelius
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 11
25359d709d503bab6e2b61931737e662dd293b40578ccornelius($MidNum | $MidNumLet | $Single_Quote) $BackNumericEx;
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# For dictionary-based break
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$dictionary $dictionary;
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru## -------------------------------------------------
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru!!safe_forward;
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 4
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru($Extend | $Format)+ .?;
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 6
26659d709d503bab6e2b61931737e662dd293b40578ccornelius($MidLetterEx | $MidNumLetEx | $Single_QuoteEx) ($ALetterEx | $Hebrew_LetterEx);
26759d709d503bab6e2b61931737e662dd293b40578ccornelius
26859d709d503bab6e2b61931737e662dd293b40578ccornelius# rule 7b
26959d709d503bab6e2b61931737e662dd293b40578ccornelius$Double_QuoteEx $Hebrew_LetterEx;
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 11
27259d709d503bab6e2b61931737e662dd293b40578ccornelius($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx;
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# For dictionary-based break
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$dictionary $dictionary;
276