1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 259d709d503bab6e2b61931737e662dd293b40578ccornelius# Copyright (C) 2002-2013, International Business Machines Corporation 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# and others. All Rights Reserved. 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho# file: word_POSIX.txt 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# ICU Word Break Rules, POSIX locale. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# See Unicode Standard Annex #29. 959d709d503bab6e2b61931737e662dd293b40578ccornelius# These rules are based on UAX #29 Revision 22 for Unicode Version 6.3 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Note: Updates to word.txt will usually need to be merged into 1254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# word_POSIX.txt also. 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru############################################################################## 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Character class definitions from TR 29 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru############################################################################## 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru!!chain; 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Character Class Definitions. 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2759d709d503bab6e2b61931737e662dd293b40578ccornelius$CR = [\p{Word_Break = CR}]; 2859d709d503bab6e2b61931737e662dd293b40578ccornelius$LF = [\p{Word_Break = LF}]; 2959d709d503bab6e2b61931737e662dd293b40578ccornelius$Newline = [\p{Word_Break = Newline}]; 3059d709d503bab6e2b61931737e662dd293b40578ccornelius$Extend = [\p{Word_Break = Extend}]; 3154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$Regional_Indicator = [\p{Word_Break = Regional_Indicator}]; 3259d709d503bab6e2b61931737e662dd293b40578ccornelius$Format = [\p{Word_Break = Format}]; 3359d709d503bab6e2b61931737e662dd293b40578ccornelius$Katakana = [\p{Word_Break = Katakana}]; 3459d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}]; 3559d709d503bab6e2b61931737e662dd293b40578ccornelius$ALetter = [\p{Word_Break = ALetter}]; 3659d709d503bab6e2b61931737e662dd293b40578ccornelius$Single_Quote = [\p{Word_Break = Single_Quote}]; 3759d709d503bab6e2b61931737e662dd293b40578ccornelius$Double_Quote = [\p{Word_Break = Double_Quote}]; 3859d709d503bab6e2b61931737e662dd293b40578ccornelius$MidNumLet = [\p{Word_Break = MidNumLet} - [.]]; 3959d709d503bab6e2b61931737e662dd293b40578ccornelius$MidLetter = [\p{Word_Break = MidLetter}]; 4059d709d503bab6e2b61931737e662dd293b40578ccornelius$MidNum = [\p{Word_Break = MidNum} [.]]; 4159d709d503bab6e2b61931737e662dd293b40578ccornelius$Numeric = [\p{Word_Break = Numeric}]; 4259d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; 4359d709d503bab6e2b61931737e662dd293b40578ccornelius 4459d709d503bab6e2b61931737e662dd293b40578ccornelius$Han = [:Han:]; 4559d709d503bab6e2b61931737e662dd293b40578ccornelius$Hiragana = [:Hiragana:]; 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Dictionary character set, for triggering language-based break engines. Currently 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# limited to LineBreak=Complex_Context. Note that this set only works in Unicode 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 5.0 or later as the definition of Complex_Context was corrected to include all 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# characters requiring dictionary break. 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 5385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho$Control = [\p{Grapheme_Cluster_Break = Control}]; 5454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$HangulSyllable = [\uac00-\ud7a3]; 5554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$ComplexContext = [:LineBreak = Complex_Context:]; 5654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$KanaKanji = [$Han $Hiragana $Katakana]; 5754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$dictionaryCJK = [$KanaKanji $HangulSyllable]; 5854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$dictionary = [$ComplexContext $dictionaryCJK]; 5954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 6054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# leave CJK scripts out of ALetterPlus 6154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; 6254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 6585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho# Rules 4 Ignore Format and Extend characters, 6685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho# except when they appear at the beginning of a region of text. 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 6854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# TODO: check if handling of katakana in dictionary makes rules incorrect/void 6959d709d503bab6e2b61931737e662dd293b40578ccornelius$KatakanaEx = $Katakana ($Extend | $Format)*; 7059d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_LetterEx = $Hebrew_Letter ($Extend | $Format)*; 7159d709d503bab6e2b61931737e662dd293b40578ccornelius$ALetterEx = $ALetterPlus ($Extend | $Format)*; 7259d709d503bab6e2b61931737e662dd293b40578ccornelius$Single_QuoteEx = $Single_Quote ($Extend | $Format)*; 7359d709d503bab6e2b61931737e662dd293b40578ccornelius$Double_QuoteEx = $Double_Quote ($Extend | $Format)*; 7459d709d503bab6e2b61931737e662dd293b40578ccornelius$MidNumLetEx = $MidNumLet ($Extend | $Format)*; 7559d709d503bab6e2b61931737e662dd293b40578ccornelius$MidLetterEx = $MidLetter ($Extend | $Format)*; 7659d709d503bab6e2b61931737e662dd293b40578ccornelius$MidNumEx = $MidNum ($Extend | $Format)*; 7759d709d503bab6e2b61931737e662dd293b40578ccornelius$NumericEx = $Numeric ($Extend | $Format)*; 7859d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*; 7954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$Regional_IndicatorEx = $Regional_Indicator ($Extend | $Format)*; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho$Ideographic = [\p{Ideographic}]; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$HiraganaEx = $Hiragana ($Extend | $Format)*; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$IdeographicEx = $Ideographic ($Extend | $Format)*; 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru## ------------------------------------------------- 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru!!forward; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Rule 3 - CR x LF 9185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho# 9285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho$CR $LF; 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# of a region of Text. The rule here comes into play when the start of text 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# begins with a group of Format chars, or with a "word" consisting of a single 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# char that is not in any of the listed word break categories followed by 9854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# format char(s), or is not a CJK dictionary character. 9959d709d503bab6e2b61931737e662dd293b40578ccornelius[^$CR $LF $Newline]? ($Extend | $Format)+; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$NumericEx {100}; 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$ALetterEx {200}; 10354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$HangulSyllable {200}; 10459d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_LetterEx{200}; 10554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$KatakanaEx {400}; # note: these status values override those from rule 5 10654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$HiraganaEx {400}; # by virtue of being numerically larger. 10785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho$IdeographicEx {400}; # 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 10985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho# 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 5 11185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho# Do not break between most letters. 11285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho# 11359d709d503bab6e2b61931737e662dd293b40578ccornelius($ALetterEx | $Hebrew_LetterEx) ($ALetterEx | $Hebrew_LetterEx) {200}; 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 6 and 7 11659d709d503bab6e2b61931737e662dd293b40578ccornelius($ALetterEx | $Hebrew_LetterEx) ($MidLetterEx | $MidNumLetEx | $Single_QuoteEx) ($ALetterEx | $Hebrew_LetterEx) {200}; 11759d709d503bab6e2b61931737e662dd293b40578ccornelius 11859d709d503bab6e2b61931737e662dd293b40578ccornelius# rule 7a 11959d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_LetterEx $Single_QuoteEx {200}; 12059d709d503bab6e2b61931737e662dd293b40578ccornelius 12159d709d503bab6e2b61931737e662dd293b40578ccornelius# rule 7b and 7c 12259d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_LetterEx $Double_QuoteEx $Hebrew_LetterEx {200}; 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 8 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$NumericEx $NumericEx {100}; 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 9 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13059d709d503bab6e2b61931737e662dd293b40578ccornelius($ALetterEx | $Hebrew_LetterEx) $NumericEx {200}; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 10 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13459d709d503bab6e2b61931737e662dd293b40578ccornelius$NumericEx ($ALetterEx | $Hebrew_LetterEx) {200}; 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 11 and 12 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13859d709d503bab6e2b61931737e662dd293b40578ccornelius$NumericEx ($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx {100}; 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 13 14154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# to be consistent with $KanaKanji $KanaKanhi, changed 14254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# from 300 to 400. 14354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# See also TestRuleStatus in intltest/rbbiapts.cpp 14454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$KatakanaEx $KatakanaEx {400}; 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 13a/b 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14859d709d503bab6e2b61931737e662dd293b40578ccornelius$ALetterEx $ExtendNumLetEx {200}; # (13a) 14959d709d503bab6e2b61931737e662dd293b40578ccornelius$Hebrew_LetterEx $ExtendNumLetEx {200}; # (13a) 15059d709d503bab6e2b61931737e662dd293b40578ccornelius$NumericEx $ExtendNumLetEx {100}; # (13a) 15159d709d503bab6e2b61931737e662dd293b40578ccornelius$KatakanaEx $ExtendNumLetEx {400}; # (13a) 15259d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx $ExtendNumLetEx {200}; # (13a) 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15459d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx $ALetterEx {200}; # (13b) 15559d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx $Hebrew_Letter {200}; # (13b) 15659d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx $NumericEx {100}; # (13b) 15759d709d503bab6e2b61931737e662dd293b40578ccornelius$ExtendNumLetEx $KatakanaEx {400}; # (13b) 15854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 15954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# rule 13c 16054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 16154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$Regional_IndicatorEx $Regional_IndicatorEx; 16254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 16354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# special handling for CJK characters: chain for later dictionary segmentation 16454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$HangulSyllable $HangulSyllable {200}; 16554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru## ------------------------------------------------- 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru!!reverse; 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17259d709d503bab6e2b61931737e662dd293b40578ccornelius$BackHebrew_LetterEx = ($Format | $Extend)* $Hebrew_Letter; 17354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackALetterEx = ($Format | $Extend)* $ALetterPlus; 17459d709d503bab6e2b61931737e662dd293b40578ccornelius$BackSingle_QuoteEx = ($Format | $Extend)* $Single_Quote; 17559d709d503bab6e2b61931737e662dd293b40578ccornelius$BackDouble_QuoteEx = ($Format | $Extend)* $Double_Quote; 17654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackMidNumLetEx = ($Format | $Extend)* $MidNumLet; 17754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackNumericEx = ($Format | $Extend)* $Numeric; 17854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackMidNumEx = ($Format | $Extend)* $MidNum; 17954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackMidLetterEx = ($Format | $Extend)* $MidLetter; 18054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackKatakanaEx = ($Format | $Extend)* $Katakana; 18154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackHiraganaEx = ($Format | $Extend)* $Hiragana; 18254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackExtendNumLetEx = ($Format | $Extend)* $ExtendNumLet; 18354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackRegional_IndicatorEx = ($Format | $Extend)* $Regional_Indicator; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 3 18685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho$LF $CR; 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 4 18959d709d503bab6e2b61931737e662dd293b40578ccornelius($Format | $Extend)* [^$CR $LF $Newline]?; 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 5 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19359d709d503bab6e2b61931737e662dd293b40578ccornelius($BackALetterEx | $BackHebrew_LetterEx) ($BackALetterEx | $BackHebrew_LetterEx); 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 6 and 7 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19759d709d503bab6e2b61931737e662dd293b40578ccornelius($BackALetterEx | $BackHebrew_LetterEx) ($BackMidLetterEx | $BackMidNumLetEx | $BackSingle_QuoteEx) ($BackALetterEx | $BackHebrew_LetterEx); 19859d709d503bab6e2b61931737e662dd293b40578ccornelius 19959d709d503bab6e2b61931737e662dd293b40578ccornelius# rule 7a 20059d709d503bab6e2b61931737e662dd293b40578ccornelius$BackSingle_QuoteEx $BackHebrew_LetterEx; 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20259d709d503bab6e2b61931737e662dd293b40578ccornelius# Rule 7b and 7c 20359d709d503bab6e2b61931737e662dd293b40578ccornelius$BackHebrew_LetterEx $BackDouble_QuoteEx $BackHebrew_LetterEx; 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 8 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$BackNumericEx $BackNumericEx; 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 9 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21159d709d503bab6e2b61931737e662dd293b40578ccornelius$BackNumericEx ($BackALetterEx | $BackHebrew_LetterEx); 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 10 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21559d709d503bab6e2b61931737e662dd293b40578ccornelius($BackALetterEx | $BackHebrew_LetterEx) $BackNumericEx; 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 11 and 12 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21959d709d503bab6e2b61931737e662dd293b40578ccornelius$BackNumericEx ($BackMidNumEx | $BackMidNumLetEx | $BackSingle_QuoteEx) $BackNumericEx; 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 13 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$BackKatakanaEx $BackKatakanaEx; 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rules 13 a/b 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# 22759d709d503bab6e2b61931737e662dd293b40578ccornelius$BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx); 22859d709d503bab6e2b61931737e662dd293b40578ccornelius($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx; 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# rule 13c 23154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 23254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$BackRegional_IndicatorEx $BackRegional_IndicatorEx; 23354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 23454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius# special handling for CJK characters: chain for later dictionary segmentation 23554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$HangulSyllable $HangulSyllable; 23654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius$KanaKanji $KanaKanji; #different rule status if both kanji and kana found 23754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru## ------------------------------------------------- 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru!!safe_reverse; 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 3 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru($Extend | $Format)+ .?; 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 6 24659d709d503bab6e2b61931737e662dd293b40578ccornelius($MidLetter | $MidNumLet | $Single_Quote) ($BackALetterEx | $BackHebrew_LetterEx); 24759d709d503bab6e2b61931737e662dd293b40578ccornelius 24859d709d503bab6e2b61931737e662dd293b40578ccornelius# rule 7b 24959d709d503bab6e2b61931737e662dd293b40578ccornelius$Double_Quote $BackHebrew_LetterEx; 25059d709d503bab6e2b61931737e662dd293b40578ccornelius 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 11 25359d709d503bab6e2b61931737e662dd293b40578ccornelius($MidNum | $MidNumLet | $Single_Quote) $BackNumericEx; 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# For dictionary-based break 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$dictionary $dictionary; 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru## ------------------------------------------------- 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru!!safe_forward; 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 4 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru($Extend | $Format)+ .?; 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 6 26659d709d503bab6e2b61931737e662dd293b40578ccornelius($MidLetterEx | $MidNumLetEx | $Single_QuoteEx) ($ALetterEx | $Hebrew_LetterEx); 26759d709d503bab6e2b61931737e662dd293b40578ccornelius 26859d709d503bab6e2b61931737e662dd293b40578ccornelius# rule 7b 26959d709d503bab6e2b61931737e662dd293b40578ccornelius$Double_QuoteEx $Hebrew_LetterEx; 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# rule 11 27259d709d503bab6e2b61931737e662dd293b40578ccornelius($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx; 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# For dictionary-based break 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru$dictionary $dictionary; 276