1/*
2 ******************************************************************************
3 * Copyright (C) 2005-2007, International Business Machines Corporation and   *
4 * others. All Rights Reserved.                                               *
5 ******************************************************************************
6 */
7
8#include <stdio.h>
9#include <string.h>
10#include <stdlib.h>
11#include <time.h>
12
13#include "wbnf.h"
14
15// Most of this code is meant to test the test code. It's a self test.
16// Normally this isn't run.
17#define TEST_WBNF_TEST 0
18
19///////////////////////////////////////////////////////////
20//
21// Constants and the most basic helper classes
22//
23
24static const char DIGIT_CHAR[] = "0123456789";
25static const char WHITE_SPACE[] = {'\t', ' ', '\r', '\n', 0};
26static const char ALPHABET[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
27static const char SPECIAL[] = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
28
29static inline UBool isInList(const char c /*in*/, const char list[] /*in*/){
30    const char * p = list;
31    for (;*p != 0 && *p != c; p++);
32    return *p?TRUE:FALSE;
33}
34static inline UBool isDigit(char c) {return isInList(c, DIGIT_CHAR);}
35static inline UBool isWhiteSpace(char c) {return isInList(c, WHITE_SPACE);}
36static inline UBool isAlphabet(char c) {return isInList(c, ALPHABET);}
37static inline UBool isSpecialAsciiChar(char c) {return isInList(c,SPECIAL);}
38
39
40
41///////////////////////////////////////////////////////////
42//
43// Helper classes
44//
45
46class Buffer_byte{
47// Utility class, can be treated as an auto expanded array. no boundary check.
48
49    typedef char byte;
50    byte * start;
51    byte * current;
52    int buffer_size; // size unit is byte
53public:
54    inline int content_size(){return current - start;} // size unit is byte
55
56private:
57    inline void expand(int add_size = 100){ // size unit is byte
58        int new_size = buffer_size + add_size;
59
60        int cs_snap = content_size();
61        start = (byte *) realloc(start, new_size);   // may change the value of start
62        current = start + cs_snap;
63
64        memset(current, 0, add_size);
65        buffer_size = new_size;
66    }
67
68    inline void expand_to(int size){
69        int r = size - buffer_size;
70        if (r > 0) {
71            expand(r);  // simply expand, no block alignment
72        }
73    }
74    Buffer_byte(const Buffer_byte &);
75    Buffer_byte & operator = (const Buffer_byte &);
76public:
77    Buffer_byte():start(NULL),current(start),buffer_size(0){
78        expand();
79    }
80    ~Buffer_byte(){
81        free(start);
82    }
83
84    inline void reset(){
85        start != NULL ? memset(start, 0, buffer_size) : 0;
86        current = start;
87    }
88
89    // Using memory copy method to append a C array to buffer,
90    inline void append(const void * c, int size){ // size unit is byte
91        expand_to(content_size() + size) ;
92        memcpy(current, c, size);
93        current = current + size;
94    }
95
96    byte * buffer(){
97        return start;
98    }
99};
100
101/*
102  The class(es) try to work as bulid-in array, so it overloads these two operators
103    operator type *();
104    type & operator[];
105  The first is used to auto type convert, the latter is used to select member.
106
107  A small trick is the class does not overload the address-of operator. This
108  behavior is different from bulid-in array, but it give us the opportunity
109  to get the address of the class itself.
110*/
111//template<typename type>
112//    class BUFFER{
113//       typedef BUFFER name;
114#define BUFFER(type, name)\
115    class name {\
116    private:\
117       Buffer_byte buf;\
118    public:\
119        name & reset() {buf.reset(); return *this;}\
120        name & append(type c) {buf.append(&c, sizeof(type)); return *this;}\
121        name & append_array(const type * p, int size) {buf.append(p, sizeof(type)*size); return *this;}\
122        type & operator [] (int i) { return ((type *) buf.buffer())[i];}\
123        operator type *(){return (type *) buf.buffer();} \
124        int content_size(){return buf.content_size() / sizeof(type);}\
125    }
126
127
128class Pick{
129/* The Pick is the basic language generator element*/
130public:
131    // generate a string accroding the syntax
132    // Return a null-terminated c-string. The buffer is owned by callee.
133    virtual const char* next() = 0;
134    virtual ~Pick(){};
135};
136
137//typedef BUFFER<char> Buffer_char;
138//typedef BUFFER<int> Buffer_int;
139//typedef BUFFER<Pick *> Buffer_pPick;
140BUFFER(char, Buffer_char);
141BUFFER(int, Buffer_int);
142BUFFER(Pick *, Buffer_pPick);
143
144class SymbolTable{
145/* Helper class.
146* It's a mapping table between 'variable name' and its 'active Pick object'
147*/
148private:
149    Buffer_char  name_buffer;   // var names storage space
150
151    Buffer_int   names;         // points to name (offset in name_buffer)
152    Buffer_pPick refs;          // points to Pick
153
154    int get_index(const char *const var_name){
155        int len = names.content_size();
156        for (int i=0; i< len; i++){
157            if (strcmp(var_name, name_buffer + names[i]) == 0){
158                return i;
159            }
160        }
161        return -1;
162    }
163
164public:
165    enum RESULT {EMPTY, NO_VAR, NO_REF, HAS_REF};
166
167    RESULT find(const char *const var_name /*[in] c-string*/, Pick * * ref = NULL /*[out] Pick* */){
168        if (!var_name) return EMPTY; // NULL name
169
170        int i = get_index(var_name);
171        if (i == -1){
172            return NO_VAR;   // new name
173        }
174        if (!refs[i]){  // exist name, no ref
175            return NO_REF;
176        } else {
177            if (ref) {
178                *ref = refs[i];
179            }
180            return HAS_REF;   // exist name, has ref
181        }
182    }
183
184    void put(const char *const var_name, Pick *const var_ref = NULL){
185        int i = get_index(var_name);
186        switch(find(var_name)){
187            case EMPTY:    // NULL name
188                break;
189            case NO_VAR:    // new name
190                int offset;
191                offset = name_buffer.content_size();
192                name_buffer.append_array(var_name, strlen(var_name) + 1);
193                names.append(offset);
194                refs.append(var_ref);
195                break;
196            case NO_REF:    // exist name, no ref
197                refs[i] = var_ref;    // link definition with variable
198                break;
199            case HAS_REF:    // exist name, has ref
200                if (var_ref){
201                    refs[i] = var_ref;
202                }
203                break;
204            default:
205                ; // ASSERT(FALSE);
206        }
207        return;
208    }
209
210    UBool is_complete(){
211        int n = names.content_size();
212        for (int i=0; i<n; ++i){
213            if (refs[i] == NULL){
214                return FALSE;
215            }
216        }
217        return TRUE;
218    }
219
220    void reset(){
221        names.reset();
222        name_buffer.reset();
223
224        // release memory here
225        int s = refs.content_size();
226        for (int i=0; i < s; i++){
227            delete refs[i]; // TOFIX: point alias/recursion problem
228        }
229        refs.reset();
230    }
231
232    ~SymbolTable(){
233        reset();
234    }
235};
236
237
238/*
239// Document of class Escaper
240//
241// ATTENTION:
242// From http://icu-project.org/userguide/Collate_Customization.html.
243// We get the precedence of escape/quote operations
244//
245//     (highest) 1. backslash               \
246//               2. two single quotes       ''
247//               3. quoting                 ' '
248//
249// ICU Collation should accept following as the same string.
250//
251// 1)  'ab'c        _
252// 2)  a\bc          \
253// 3)  a'b'\c        |- They are equal.
254// 4)  abc          _/
255//
256// From "two single quotes", we have following deductions
257//    D1. empty quoting is illgal. (obviously)
258//    D2. no contact operation between two quotings
259//              '.''.'      is not ..   it is .'.
260//    D3. "two single quotes" cannot contact two quoting simultaneously
261//              '..''''.'   is not ..'. it is ..''.
262//       NOTICE:
263//        "two single quotes" can contact before one quoting
264//              '''.'       is '.
265//        "two single quotes" can literally contact after one quoting
266//        But, from syntax, it's one quoting including a "two single quotes"
267//              '.'''       is .'
268//    D4. "two single quotes" cannot solely be included in quoting
269//              ''''        is not '    it is ''
270//       NOTICE:  These are legal
271//              '.''.'      is .'.
272//              '.'''       is .'
273//
274//                 dicision
275//                    /\
276//                   /__\
277//      output buffer    input buffer
278//
279// To make our dicision (within an atom operation) without caring input and output buffer,
280// following calling pattern (within an atom operation) shall be avoided
281//
282//    P1 open_quoting()  then close_quoting()    (direct violation)   D1
283//    P2 close_quoting() then open_quoting()     (direct violation)   D2
284//    P3 empty open_quoting()                    (indirect violation) D1, D4
285//    P4 empty close_quoting()                   (indirect violation) D2, D3
286//    P5 open_quoting()  then two single quotes  (indirect violation) D4
287//    P6 close_quoting() then two single quotes  (indirect violation) D3
288//
289// two single quotes escaping will not open_ or close_ quoting()
290// The choice will not lose some quoing forms.
291//
292// For open_quoting(),
293// we may get this form quoting     '''         P5
294// It may raise a bug               ''''x
295// If we expect
296//      '''.'       let the next char open the quoting
297//      '.''.'      the quoting is already opened by preceding char
298//
299// For close_quoting()
300// we will get this form quoting    '.'''       P6
301// It may raise a bug               '.''''.'
302// If we expect
303//      '.'''\.     let the next char close the quoting
304//      '.''''.'    the expectation is wrong!  using  '.'\''.' instead
305//
306// It's a hard work to re-adjust generation opportunity for various escaping form.
307// We just simply ignore it.
308*/
309class Escaper{
310public:
311    enum CHOICE {YES, NO, RAND};
312    enum ESCAPE_FORM {BSLASH_ONLY, QUOTE_ONLY, QUOTE_AND_BSLAH, RAND_ESC};
313private:
314    class Bool{ // A wrapper class for CHOICE, to auto adapter UBool class
315        private:
316            const CHOICE tag;
317        public:
318            Bool(CHOICE flag=RAND):tag(flag){}
319            operator UBool() {   // conversion operator
320                return tag == RAND ? rand()%2 : tag == YES;
321                //if (tag == RAND){
322                //    return rand()%2 == 1;
323                //} else {
324                //    return tag == YES ? TRUE : FALSE;
325                //}
326            }
327    };
328public:
329    Escaper(CHOICE escapeLiteral = RAND,
330        CHOICE twoQuotesEscape = RAND,
331        ESCAPE_FORM escapeForm = RAND_ESC):
332        escape_form(escapeForm),
333        escape_literal(escapeLiteral),
334        two_quotes_escape(twoQuotesEscape),
335        is_quoting(FALSE){}
336private:
337    Buffer_char str;
338    ESCAPE_FORM escape_form;
339    Bool escape_literal;
340    Bool two_quotes_escape;
341    UBool quote_escape;
342    UBool bslash_escape;
343    UBool is_quoting;
344
345    void set_options(){
346        ESCAPE_FORM t = escape_form == RAND_ESC ? (ESCAPE_FORM) (rand()%3) : escape_form;
347        switch (t){
348                case BSLASH_ONLY :
349                    bslash_escape = TRUE; quote_escape = FALSE; break;
350                case QUOTE_ONLY:
351                    bslash_escape = FALSE;quote_escape = TRUE;  break;
352                case QUOTE_AND_BSLAH:
353                    bslash_escape = TRUE; quote_escape = TRUE;  break;
354                default:
355                    ;// error
356        }
357    }
358
359    void reset(){
360        str.reset();
361        is_quoting = FALSE;
362    }
363
364    inline void open_quoting(){
365        if(is_quoting){
366            // do nothing
367        } else {
368            str.append('\'');
369            is_quoting = TRUE;
370        }
371    }
372    inline void close_quoting(){
373        if(is_quoting){
374            str.append('\'');
375            is_quoting = FALSE;
376        } else {
377            // do nothing
378        }
379    }
380
381    // str  [in]    null-terminated c-string
382    void append(const char * strToAppend){
383        for(;*strToAppend != 0; strToAppend++){
384            append(*strToAppend);
385        }
386    }
387
388    inline void append(const char c){
389        set_options();
390
391        if (c == '\\'){
392            quote_escape ? open_quoting() : close_quoting();
393            //bslash_escape always true here
394            str.append('\\');
395            str.append('\\');
396        } else if (c == '\''){
397            if (two_quotes_escape){     // quoted using two single quotes
398                // See documents in anonymous.design
399                str.append('\'');
400                str.append('\'');
401            } else{
402                quote_escape ? open_quoting() : close_quoting();
403                //bslash_escape always true here
404                str.append('\\');
405                str.append('\'');
406            }
407        } else if (isSpecialAsciiChar(c) || isWhiteSpace(c)){
408            quote_escape  ? open_quoting()   : close_quoting();
409            if (bslash_escape) str.append('\\');
410            str.append(c);
411        } else { //if (isAlphabet(c) || isDigit(c) || TRUE){ // treat others as literal
412            if (escape_literal){
413                quote_escape  ? open_quoting()   : close_quoting();
414                if (bslash_escape)  str.append('\\');
415                str.append(c);
416            } else {
417                close_quoting();
418                str.append(c);
419            }
420        }
421    }
422
423public:
424    // Return a null-terminate c-string. The buffer is owned by callee.
425    char * operator()(const char * literal /*c-string*/){
426        str.reset();
427        for(;*literal != 0; literal++){
428            append(*literal);
429        }
430        close_quoting();    // P4 exception, to close whole quoting
431        return str;
432    }
433};
434
435class WeightedRand{
436// Return a random number in [0, size)
437// Every number has different chance (aka weight) to be selected.
438private:
439    Buffer_int weights;
440    double total;
441    WeightedRand(const WeightedRand &);
442    WeightedRand & operator = (const WeightedRand &);
443public:
444    WeightedRand(Buffer_int * weight_list = NULL, int size = 0){
445        if ( weight_list == NULL){
446            for (int i=0; i<size; ++i) weights.append(DEFAULT_WEIGHT);
447        } else {
448            int s = weight_list->content_size();
449            if (s < size){
450                weights.append_array( (*weight_list),s);
451                for (int i=s; i<size; ++i) weights.append(DEFAULT_WEIGHT);
452            } else { // s >= size
453                weights.append_array( (*weight_list),size);
454            }
455        }
456        total = 0;
457        int c = weights.content_size();
458        for (int i=0; i<c; ++i){
459            total += weights[i];
460        }
461    }
462
463    void append(int weight){
464        weights.append(weight);
465        total += weight;
466    }
467
468    // Give a random number with the consideration of weight.
469    // Every random number is associated with a weight.
470    // It identifies the chance to be selected,
471    // larger weight has more chance to be selected.
472    //
473    //
474    //  ______________________   every slot has equal chance
475    //
476    //  [____][_][___][______]   each item has different slots, hence different chance
477    //
478    //
479    //  The algorithms to generate the number is illustrated by preceding figure.
480    //  First, a slot is selected by rand(). Then we translate the slot to corresponding item.
481    //
482    int next(){
483        // get a random in [0,1]
484        double reference_mark = (double)rand() / (double)RAND_MAX;
485
486        // get the slot's index, 0 <= mark <= total;
487        double mark = total * reference_mark;
488
489        // translate the slot to corresponding item
490        int i=0;
491        for (;;){
492            mark -= weights[i];  // 0 <= mark <= total
493            if (mark <= 0)
494                break;
495            i++;
496        }
497        return i;
498    }
499};
500
501///////////////////////////////////////////////////////////
502//
503// The parser result nodes
504//
505
506class Literal : public Pick {
507public:
508    virtual const char* next(){
509        return str;
510    }
511    Literal(const char * s /*c-string*/){
512        str.append_array(s, strlen(s) + 1);
513    }
514private:
515    Buffer_char str; //null-terminated c-string
516};
517
518class Variable : public Pick {
519public:
520    Variable(SymbolTable * symbols, const char * varName, Pick * varRef = NULL){
521        this->var_name.append_array(varName, strlen(varName) + 1);
522        if ((symbol_table = symbols)){
523            symbol_table->put(varName, varRef);
524        }
525    }
526
527    operator const char *(){
528        return var_name;
529    }
530
531    virtual const char* next(){
532        if (symbol_table){
533            Pick * var_ref = NULL;
534            symbol_table->find(var_name, &var_ref);
535            if (var_ref) {
536                return var_ref->next();
537            }
538        }
539        return "";  // dumb string
540    }
541private:
542    Buffer_char var_name;
543    SymbolTable * symbol_table;
544};
545
546class Quote : public Pick{
547public:
548    Quote(Pick & base):item(base),e(Escaper::NO, Escaper::NO, Escaper::BSLASH_ONLY){
549    }
550    virtual const char* next(){
551        return e(item.next());
552    }
553private:
554    Pick & item;
555    Buffer_char str;
556    Escaper e;
557};
558
559
560class Morph : public Pick{
561/*
562The difference between morph and an arbitrary random string is that
563a morph changes slowly. When we build collation rules, for example,
564it is a much better test if the strings we use are all in the same
565'neighborhood'; they share many common characters.
566*/
567public:
568    Morph(Pick & base):item(base){}
569
570    virtual const char* next(){
571        current.reset();
572        const char * s = item.next();
573        current.append_array(s, strlen(s) + 1);
574        if  (last.content_size() == 0) {
575            str.reset();
576            last.reset();
577            str.append_array(current, current.content_size());
578            last.append_array(current, current.content_size());
579        } else {
580            morph();
581        }
582        return str;
583    }
584private:
585    Pick & item;
586    Buffer_char str;
587    Buffer_char last;
588    Buffer_char current;
589
590    char * p_last;
591    char * p_curr;
592
593    void copy_curr(){
594        if (*p_curr) {
595            str.append(*p_curr);
596            p_curr++;
597        }
598    }
599
600    void copy_last(){
601        if (*p_last) {
602            str.append(*p_last);
603            p_last++;
604        }
605    }
606
607    // copy 0, 1, or 2 character(s) to str
608    void copy(){
609        static WeightedRand wr(& Buffer_int().append(DEFAULT_WEIGHT * 10), 5);
610
611        switch (wr.next()){
612            case 0: // copy last  -- has 10 times chance than others
613                copy_last();
614                break;
615            case 1: // copy both
616                copy_curr();
617                copy_last();
618                break;
619            case 2: // copy both
620                copy_last();
621                copy_curr();
622                break;
623            case 3:
624                copy_curr();
625                break;
626            case 4:  // copy nothing
627                break;
628            default:
629                // ASSERT(FALSE);
630                ;
631        }
632    }
633
634    void morph(void){
635        int min = strlen(last);
636        int max = strlen(current);
637        if (min > max){
638            int temp  = min;
639            min = max;
640            max = temp;
641        }
642
643        int len = min + rand()%(max - min + 1); // min + [0, diff]
644        p_curr = current;
645        p_last = last;
646        str.reset();
647
648        for (; str.content_size()<len && *p_curr && *p_last;){
649            copy(); // copy 0, 1, or 2 character(s) to str
650        }
651
652        if (str.content_size() == len) {
653            str.append(0);
654            final();
655            return;
656        }
657
658        if (str.content_size() > len) { // if the last copy copied two characters
659            str[len]=0;
660            final();
661            return;
662        }
663
664        // str.content_size() < len
665        if (*p_last) {
666            for (; str.content_size() < len; copy_last());
667        } else if (*p_curr){
668            for (; str.content_size() < len; copy_curr());
669        }
670
671        int last_len = last.content_size();
672        for (;str.content_size() < len;){
673            str.append(last[rand()%last_len]);
674        }
675        str.append(0);
676        final();
677    }
678
679    void final(){
680        last.reset();
681        last.append_array(current, current.content_size());
682    }
683};
684
685class Sequence : public Pick {
686public:
687    virtual const char* next(){
688        str.reset();
689        int s = items.content_size();
690        for(int i=0; i < s; i++){
691            const char * t = items[i]->next();
692            str.append_array(t, strlen(t));
693        }
694        str.append(0); // terminal null
695        return str;
696    }
697
698    void append (Pick * node){
699        items.append(node);
700    }
701
702    virtual ~Sequence(){
703        int s = items.content_size();
704        for(int i=0; i < s; i++){
705            //How can assure the item is got from heap?
706            //Let's assume it.
707            delete items[i]; // TOFIX: point alias/recursion problem
708            items[i] = NULL;
709        }
710    }
711private:
712    Buffer_pPick items;
713    Buffer_char  str; //null-terminated c-string
714};
715
716class Repeat : public Pick {
717private:
718    Pick * item;
719    Buffer_char str;
720    WeightedRand wr;
721    int min;
722    int max;
723    int select_a_count(){
724        return min + wr.next();
725    }
726public:
727    virtual const char* next(){
728        str.reset();
729        int c = select_a_count();
730        for(int i=0; i< c; i++){
731            const char * t = item->next();
732            str.append_array(t, strlen(t));
733        }
734        str.append(0);
735        return str;
736    }
737
738    Repeat(Pick * base, int minCount =0, int maxCount = 1, Buffer_int * weights = NULL):
739        wr(weights, maxCount-minCount +1) {
740        this->item = base;
741        this->min = minCount;
742        this->max = maxCount;
743    }
744    virtual ~Repeat(){
745        delete item;  // TOFIX: point alias/recursion problem
746        item = NULL;
747    }
748};
749
750
751class Alternation : public Pick {
752public:
753    virtual const char* next(){
754        str.reset();
755        int i = wr.next();
756        const char * t = items[i]->next();
757        str.append_array(t, strlen(t) + 1);
758        return str;
759    }
760    virtual ~Alternation(){
761        int s = items.content_size();
762        for(int i=0; i < s; i++){
763            delete items[i];  // TOFIX: point alias/recursion problem
764            items[i] = NULL;
765        }
766    }
767
768    Alternation & append (Pick * node, int weight = DEFAULT_WEIGHT){
769        items.append(node);
770        wr.append(weight);
771        return *this;
772    }
773private:
774    Buffer_pPick items;
775    Buffer_char str; // null-terminated c-string
776    WeightedRand wr;
777};
778
779///////////////////////////////////////////////////////////
780//
781// The parser
782//
783
784enum TokenType {STRING, VAR, NUMBER, STREAM_END, ERROR, QUESTION, STAR, PLUS, LBRACE, RBRACE, LPAR, RPAR, SEMI, EQ, COMMA, BAR, AT, WAVE, PERCENT};
785
786class Scanner{
787friend int DumpScanner(Scanner & s, UBool dumb);
788private:
789    const char * source;
790    const char * working;
791    const char * history; // for debug
792    enum StateType {START, IN_NUM, IN_VAR_FIRST, IN_VAR, IN_QUOTE, IN_QUOTE_BSLASH, IN_BSLASH, IN_STRING, DONE};
793    StateType state;
794    void terminated(TokenType t){
795        working--;       // return the peeked character
796        tokenType = t;
797        token.append(0); // close buffer
798        state = DONE;
799    }
800public:
801    // the buffer of "source" is owned by caller
802    Scanner(const char *src/*[in] c-string*/ = NULL):source(src){
803        working = src;
804        history = working;
805        state = DONE;
806        tokenType = ERROR;
807    }
808
809    //void setSource(const char *const src /*[in] c-string*/){
810    //    *(&const_cast<const char *>(source)) = src;
811    //}
812
813    Buffer_char token;
814    TokenType tokenType;
815
816    TokenType getNextToken(){
817        token.reset();
818        state = START;
819        history = working; // for debug
820        while (state != DONE){
821            char c = *working++;
822            if (c == 0 && state != START){//avoid buffer overflow. for IN_QUOE, IN_ESCAPE
823                terminated(ERROR);
824                break; // while
825            }
826            switch(state){
827                case START:
828                    tokenType = ERROR;
829                    switch(c){
830                        case '?'  : tokenType = QUESTION; break;
831                        case '*'  : tokenType = STAR; break;
832                        case '+'  : tokenType = PLUS; break;
833                        case '{'  : tokenType = LBRACE; break;
834                        case '}'  : tokenType = RBRACE; break;
835                        case '('  : tokenType = LPAR; break;
836                        case ')'  : tokenType = RPAR; break;
837                        case ';'  : tokenType = SEMI; break;
838                        case '='  : tokenType = EQ; break;
839                        case ','  : tokenType = COMMA; break;
840                        case '|'  : tokenType = BAR; break;
841                        case '@'  : tokenType = AT; break;
842                        case '~'  : tokenType = WAVE; break;
843                        case '%'  : tokenType = PERCENT; break;
844                        case 0    : tokenType = STREAM_END; working-- /*avoid buffer overflow*/; break;
845                    }
846                    if (tokenType != ERROR){
847                        token.append(c);
848                        token.append(0);
849                        state = DONE;
850                        break; // START
851                    }
852                    switch(c){
853                        case '$'  : state = IN_VAR_FIRST; token.append(c); break;
854                        case '\'' : state = IN_QUOTE;     break;
855                        case '\\' : state = IN_BSLASH;    break;
856                        default:
857                            if (isWhiteSpace(c)){    // state = START;   //do nothing
858                            } else if (isDigit(c)){     state = IN_NUM;    token.append(c);
859                            } else if (isAlphabet(c)){  state = IN_STRING; token.append(c);
860                            } else {terminated(ERROR);}
861                    }
862                    break;//START
863                case IN_NUM:
864                    if (isDigit(c)){
865                        token.append(c);
866                    } else {
867                        terminated(NUMBER);
868                    }
869                    break;//IN_NUM
870                case IN_VAR_FIRST:
871                    if (isAlphabet(c)){
872                        token.append(c);
873                        state = IN_VAR;
874                    } else {
875                        terminated(ERROR);
876                    }
877                    break; // IN_VAR_FISRT
878                case IN_VAR:
879                    if (isAlphabet(c) || isDigit(c)){
880                        token.append(c);
881                    } else {
882                        terminated(VAR);
883                    }
884                    break;//IN_VAR
885                case IN_STRING:
886                    // About the scanner's behavior for STRING, AT, and ESCAPE:
887                    // All of them can be contacted with each other.
888                    // This means the scanner will eat up as much as possible strings
889                    //   (STRING, AT, and ESCAPE) at one time, with no regard of their
890                    //   combining sequence.
891                    //
892                    if (c == '\''){
893                        state = IN_QUOTE; // the first time we see single quote
894                    } else if (c =='\\'){ // back slash character
895                        state = IN_BSLASH;
896                    } else if (isAlphabet(c) || isDigit(c)){
897                        token.append(c);
898                    } else{
899                        terminated(STRING);
900                    }
901                    break;//IN_STRING
902                case IN_QUOTE:
903                    if (c == '\''){ // the second time we see single quote
904                        state = IN_STRING; // see document in IN_STRING
905                    } else if ( c== '\\') { // backslah escape in quote
906                        state = IN_QUOTE_BSLASH;
907                    } else {
908                        token.append(c);  // eat up everything, includes back slash
909                    }
910                    break;//IN_QUOTE
911                case IN_QUOTE_BSLASH:
912                case IN_BSLASH:
913                    switch (c){
914                        case 'n'  : token.append('\n'); break;
915                        case 'r'  : token.append('\r'); break;
916                        case 't'  : token.append('\t'); break;
917                        case '\'' : token.append('\''); break;
918                        case '\\' : token.append('\\'); break;
919                        default: token.append(c); // unknown escaping, treat it as literal
920                    }
921                    if (state == IN_BSLASH){
922                        state = IN_STRING; // see document in IN_STRING
923                    } else { // state == IN_QUOTE_BSLASH
924                        state = IN_QUOTE;
925                    }
926                    break;//IN_BSLASH
927                case DONE:  /* should never happen */
928                default:
929                    working--;
930                    tokenType = ERROR;
931                    state = DONE;
932                    break;
933            }//switch(state)
934        }//while (state != DONE)
935
936        return tokenType;
937    }
938};//class Scanner
939
940class Parser{
941friend UBool TestParser();
942friend class TestParserT;
943friend class LanguageGenerator_impl;
944private:
945    Scanner s;
946    TokenType & token;
947    int min_max;   // for the evil infinite
948
949    UBool match(TokenType expected){
950        if (token == expected) {
951            token = s.getNextToken();
952            return TRUE;
953        } else {
954            //s.dumpCurrentPoint();
955            return FALSE;
956        }
957    }
958
959    UBool weight(int & value){
960        if (token == NUMBER){
961            int temp = atoi(s.token);
962            match(NUMBER);
963            if (match(PERCENT)){
964                value = temp;
965                return TRUE;
966            }
967        }
968        return FALSE;
969    }
970
971    UBool repeat (Pick* &node /*in,out*/){
972        if (node == NULL) return FALSE;
973
974        int count = -2;
975        int min = -2;
976        int max = -2;
977        UBool question = FALSE;
978        switch (token){
979            case QUESTION:
980                match(QUESTION);
981                min = 0;
982                max = 1;
983                count = 2;
984                question = TRUE;
985                break;
986            case STAR:
987                match(STAR);
988                min = 0;
989                max = -1;
990                count = -1;
991                break;
992            case PLUS:
993                match(PLUS);
994                min = 1;
995                max = -1;
996                count = -1;
997                break;
998            case LBRACE:
999                match(LBRACE);
1000                if (token != NUMBER){
1001                    return FALSE;
1002                }else {
1003                    min = atoi(s.token);
1004                    match(NUMBER);
1005                    if (token == RBRACE){
1006                        match(RBRACE);
1007                        max = min;
1008                        count = 1;
1009                    } else if (token == COMMA) {
1010                        match(COMMA);
1011                        if (token == RBRACE){
1012                            match(RBRACE);
1013                            max = -1;
1014                            count = -1;
1015                        } else if (token == NUMBER) {
1016                            max = atoi(s.token);
1017                            match(NUMBER);
1018                            count = max - min + 1;
1019                            if (!match(RBRACE)) {
1020                                return FALSE;
1021                            }
1022                        } else {
1023                            return FALSE;
1024                        }
1025                    } else {
1026                        return FALSE;
1027                    }
1028                }
1029                break;
1030            default:
1031                return FALSE;
1032        }
1033
1034        if (count == -2 || min == -2 || max == -2){
1035            //ASSERT(FALSE);
1036            return FALSE;
1037        }
1038
1039        // eat up following weights
1040        Buffer_int weights;
1041        int w;
1042        while (weight(w)){
1043            weights.append(w);
1044        }
1045
1046        // for the evil infinite
1047        min_max = min_max > min ? min_max : min;
1048        min_max = min_max > max ? min_max : max;
1049        if (min_max > PSEUDO_INFINIT){
1050            return FALSE; // PSEUDO_INFINIT is less than the real maximum
1051        }
1052        if (max == -1){ // the evil infinite
1053            max = PSEUDO_INFINIT;
1054        }
1055        // for the strange question mark
1056        if (question && weights.content_size() > 0){
1057            Buffer_int w2;
1058            w2.append(DEFAULT_WEIGHT - weights[0]).append(weights[0]);
1059            node = new Repeat(node,min,max,&w2);
1060            return TRUE;
1061        }
1062        node = new Repeat(node,min,max,&weights);
1063        return TRUE;
1064    }
1065
1066    UBool core(Pick* &node /*out*/){
1067        if (node != NULL) return FALSE; //assert node == NULL
1068
1069        switch(token){
1070            case LPAR:
1071                match(LPAR);
1072                if(defination(node) && match(RPAR)){
1073                    return TRUE;
1074                }
1075                return FALSE;
1076            case VAR:
1077                node = new Variable(&symbols, s.token);
1078                match(VAR);
1079                return TRUE;
1080            case STRING:
1081                node = new Literal(s.token);
1082                match(STRING);
1083                return TRUE;
1084            default:
1085                return FALSE;
1086        }
1087    }
1088    UBool modified(Pick* &node /*out*/){
1089        if (node != NULL) return FALSE; //assert node == NULL
1090
1091        if (!core(node)) {
1092            return FALSE;
1093        }
1094
1095        for (;;){
1096            switch(token){
1097                case WAVE:
1098                    match(WAVE);
1099                    node = new Morph(*node);
1100                    break;
1101                case AT:
1102                    match(AT);
1103                    node = new Quote(*node);
1104                    break;
1105                case QUESTION:
1106                case STAR:
1107                case PLUS:
1108                case LBRACE:
1109                    if (!repeat(node)) return FALSE;
1110                    break;
1111                case SEMI:      // rule definiation closed
1112                case RPAR:      // within parenthesis (core closed)
1113                case BAR:       // in alternation
1114                case NUMBER:    // in alternation, with weight
1115                case LPAR:      // in sequence
1116                case VAR:       // in sequence
1117                case STRING:    // in sequence
1118                    return TRUE;
1119                default:
1120                    return FALSE;
1121            }
1122        }
1123    }
1124
1125
1126    UBool sequence_list(Pick* &node /*in,out*/){
1127        if (node == NULL) return FALSE; // assert node != NULL
1128
1129        Sequence* seq = new Sequence();
1130        Pick * n = node;
1131
1132        while (token == VAR || token == STRING || token == LPAR){
1133            seq->append(n);
1134            n = NULL;
1135            if (modified(n)){
1136                // go on
1137            } else {
1138                goto FAIL;
1139            }
1140        }
1141
1142        if (token == SEMI || token == RPAR || token == BAR){
1143            seq->append(n);
1144            node = seq;
1145            return TRUE;
1146        }
1147FAIL:
1148        delete seq;
1149        return FALSE;
1150
1151    }
1152
1153    UBool sequence(Pick* &node /*out*/){
1154        if (node != NULL) return FALSE; //assert node == NULL
1155
1156        if (!modified(node)) {
1157            return FALSE;
1158        }
1159
1160        if (token == VAR || token == STRING || token == LPAR){
1161            return sequence_list(node);
1162        } else {
1163            return TRUE; // just a modified
1164        }
1165    }
1166
1167    UBool alternation_list(Pick* &node /*in,out*/){
1168        if (node == NULL) return FALSE; // assert node != NULL
1169
1170        Alternation * alt = new Alternation();
1171        Pick * n = node;
1172        int w = DEFAULT_WEIGHT;
1173
1174        while (token == NUMBER || token == BAR){
1175            if(token == NUMBER) {
1176                if (weight(w)){
1177                    if (token == BAR){
1178                        // the middle item, go on
1179                    } else {
1180                        // the last item or encounter error
1181                        break; //while
1182                    }
1183                } else {
1184                    goto FAIL;
1185                }
1186            } // else token == BAR
1187            match(BAR);
1188            alt->append(n,w);
1189
1190            n = NULL;
1191            w = DEFAULT_WEIGHT;
1192            if (sequence(n)){
1193                // go on
1194            } else {
1195                goto FAIL;
1196            }
1197        }
1198
1199        if (token == SEMI || token == RPAR) {
1200            alt->append(n,w);
1201            node = alt;
1202            return TRUE;
1203        }
1204FAIL:
1205        delete alt;
1206        return FALSE;
1207    }
1208
1209    UBool alternation(Pick* &node /*out*/){
1210        if (node != NULL) return FALSE; //assert node == NULL
1211
1212        // 'sequence' has higher precedence than 'alternation'
1213        if (!sequence(node)){
1214            return FALSE;
1215        }
1216
1217        if (token == BAR || token == NUMBER){ // find a real alternation1, create it.
1218            return alternation_list(node);
1219        } else {
1220            return TRUE;    // just a sequence_old
1221        }
1222    }
1223
1224
1225    UBool defination(Pick* &node /*out*/){
1226        if (node != NULL) return FALSE; //assert node == NULL
1227        return alternation(node);
1228    }
1229
1230    UBool rule(){
1231        if (token == VAR){
1232            Buffer_char name;
1233            name.append_array(s.token, strlen(s.token) + 1);
1234            match(VAR);
1235
1236            if (match(EQ)){
1237                Pick * t = NULL;
1238                if(defination(t)){
1239                    symbols.put(name, t);
1240                    return match(SEMI);
1241                }
1242            }
1243        }
1244        return FALSE;
1245    }
1246public:
1247    UBool rules(){
1248        symbols.reset();
1249        token = s.getNextToken();
1250        while (rule()){
1251        }
1252        if (token == STREAM_END){
1253            return TRUE;
1254        } else {
1255            //s.dumpCurrentPoint();
1256            return FALSE;
1257        }
1258    }
1259
1260public:
1261    SymbolTable symbols;
1262
1263    Parser(const char *const source):s(source), token(s.tokenType){
1264        min_max = -2;
1265    }
1266    UBool parse(){
1267        return rules();
1268    }
1269
1270}; // class Parser
1271
1272
1273///////////////////////////////////////////////////////////
1274//
1275//
1276//
1277
1278int DumpScanner(Scanner & s, UBool dump = TRUE){
1279    int len = strlen(s.source);
1280    int error_start_offset = s.history - s.source;
1281    if (dump){
1282        printf("\n=================== DumpScanner ================\n");
1283        fwrite(s.source, len, 1, stdout);
1284        printf("\n-----parsed-------------------------------------\n");
1285        fwrite(s.source, s.history - s.source, 1, stdout);
1286        printf("\n-----current------------------------------------\n");
1287        fwrite(s.history, s.working - s.history, 1, stdout);
1288        printf("\n-----unparsed-----------------------------------\n");
1289        fwrite(s.working, (s.source + len - s.working), 1, stdout);
1290        printf("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");
1291    }
1292    return error_start_offset;
1293}
1294
1295class LanguageGenerator_impl{
1296public:
1297    LanguageGenerator_impl(const char *const bnf_definition, const char *const top_node)
1298        :par(bnf_definition), top_node_name(top_node){
1299        srand((unsigned)time( NULL ));
1300    }
1301
1302    LanguageGenerator::PARSE_RESULT parseBNF(UBool debug = TRUE){
1303        if (par.parse()){
1304            if (par.symbols.find(top_node_name, &top_node_ref) == SymbolTable::HAS_REF) {
1305                if (par.symbols.is_complete()) {
1306                    return LanguageGenerator::OK;
1307                } else {
1308                    if (debug) printf("The bnf definition is incomplete.\n");
1309                    return LanguageGenerator::INCOMPLETE;
1310                }
1311            } else {
1312                if (debug) printf("No top node is found.\n");
1313                return LanguageGenerator::NO_TOP_NODE;
1314            }
1315        } else {
1316            if(debug) {
1317                printf("The bnf definition is wrong\n");
1318                DumpScanner(par.s, TRUE);
1319            }
1320            return LanguageGenerator::BNF_DEF_WRONG;
1321        }
1322    }
1323    const char * next(){
1324        return top_node_ref->next();
1325    }
1326
1327private:
1328    Parser par;
1329    const char *const top_node_name;
1330    Pick * top_node_ref;
1331};
1332
1333LanguageGenerator::LanguageGenerator():lang_gen(NULL){
1334}
1335
1336LanguageGenerator::~LanguageGenerator(){
1337    delete lang_gen;
1338}
1339
1340LanguageGenerator::PARSE_RESULT LanguageGenerator::parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug){
1341    if (lang_gen){
1342        delete lang_gen;
1343    }
1344    lang_gen = new LanguageGenerator_impl(bnf_definition, top_node);
1345    PARSE_RESULT r = lang_gen->parseBNF(debug);
1346    if (r != OK){
1347        delete lang_gen;
1348        lang_gen = NULL;
1349        return r;
1350    } else {
1351        return r;
1352    }
1353}
1354const char *LanguageGenerator::next(){ // Return a null-terminated c-string. The buffer is owned by callee.
1355    if (lang_gen){
1356        return lang_gen->next();
1357    }else {
1358        return "";
1359    }
1360}
1361
1362///////////////////////////////////////////////////////////
1363//
1364// The test code for WBNF
1365//
1366
1367#define CALL(fun) \
1368    if (fun()){ \
1369        printf("Pass: " #fun "\n");\
1370    } else { \
1371        printf("FAILED: !!! " #fun " !!!\n"); \
1372    }
1373
1374#define DUMP_R(fun, var, times) \
1375    {printf("\n========= " #fun " =============\n"); \
1376    for (int i=0; i<times; i++) { \
1377        const char * t = var.next();\
1378        fwrite(t,strlen(t),1,stdout); \
1379        printf("\n");   \
1380    }   \
1381    printf("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n");}
1382
1383
1384
1385#if TEST_WBNF_TEST
1386static UBool TestQuote(){
1387    const char *const str = "This ' A !,z| qq [] .new\tline";
1388    //const char *const str_r = "This \\' A '!,'z'|' qq '[]' '.'new\tline";
1389    ////
1390    //// :(  we must quote our string to following C syntax
1391    ////     cannot type the literal here, it makes our code rather human unreadable
1392    ////     very very unconformable!
1393    ////
1394    ///*
1395    //*/
1396
1397    //const char *const s1    =   "ab'c";
1398    //const char (* s1_r1) [] = { "ab''c",    // ab''c
1399    //                            "ab\\'c",   // ab\'c
1400    //                           };//
1401    ///*
1402    // .      '.'     \.
1403    // ..     \.\.    '.'\.   '.'\.   '..'    // '.''.'  wrong
1404    //*/
1405
1406    //const char *const s2    =   "a..'.b";       // a..'.b
1407    //const char (*s2_r) []   = { "a'..''.'b"     // a'..''.'b
1408    //                           ,"a'..\\'.'b"    // a'..\'.'b
1409    //                           ,"a'..'\\''.'b"  // a'..'\''.'b
1410    //                          };//
1411
1412    //const char *const s3    =   "a..\\.b";      // a..\.b
1413    //const char (*s3_r) []   = { "a'..\\\\.'b"   // a'..\\.'b
1414    //                           ,"a'..'\\\\'.'b" // a'..'\\'.'b
1415    //                          };//
1416
1417    //                            // no catact operation, no choice, must be compact
1418
1419    srand((unsigned)time( NULL ));
1420
1421    //Escaper l(Escaper::NO, Escaper::NO, Escaper::RAND_ESC);
1422    Pick *p = new Literal(str);
1423    Quote q(*p);
1424
1425    DUMP_R(TestQuote, (*p), 1);
1426    DUMP_R(TestQuote, q, 20);
1427    return FALSE;
1428}
1429static UBool TestLiteral(){
1430    const char * s = "test string99.";
1431    Literal n(s);
1432    const char * r = n.next();
1433    return strcmp(s,r) == 0;
1434}
1435
1436static UBool TestSequence(){
1437    Sequence seq;
1438    seq.append(new Literal("abc "));
1439    seq.append(new Literal(", s"));
1440
1441    return strcmp(seq.next(), "abc , s") == 0;
1442}
1443static UBool TestAlternation(){
1444    srand((unsigned)time( NULL ));
1445    Alternation alt;
1446    alt.append(new Literal("aaa_10%"),10);
1447    alt.append(new Literal("bbb_0%"),0);
1448    alt.append(new Literal("ccc_10%"),10);
1449    alt.append(new Literal("ddddddd_50%"),50);
1450
1451    DUMP_R(TestAlternation, alt, 50);
1452
1453    return FALSE;
1454}
1455
1456static UBool TestBuffer(){
1457    Buffer_int t;
1458    t.append(1).append(0).append(5);
1459    int s = t.content_size();
1460    for (int i=0; i<s; ++i){
1461        printf("%d\n", t[i]);
1462    }
1463    return FALSE;
1464}
1465
1466static UBool TestWeightedRand(){
1467    srand((unsigned)time( NULL ));
1468    Buffer_int t;
1469    t.append(1).append(0).append(5);
1470    WeightedRand wr(&Buffer_int().append(10).append(0).append(50),4);
1471//    WeightedRand wr(&t,3);
1472    for (int i=0; i< 50; ++i){
1473        printf("%d\n", wr.next());
1474    }
1475    return FALSE;
1476}
1477
1478static UBool TestRepeat(){
1479    srand((unsigned)time( NULL ));
1480    Repeat rep(new Literal("aaa1-5 "), 1, 5);
1481    DUMP_R(TestRepeat, rep, 50);
1482
1483    Repeat r2(new Literal("b{1,3}1%0%5% "), 1, 3, &Buffer_int().append(1).append(0).append(5));
1484    DUMP_R(TestRepeat, r2, 50);
1485
1486    Repeat r3(new Literal("aaa5-5 "), 5, 5);
1487    DUMP_R(TestRepeat, r3, 50);
1488
1489    return FALSE;
1490}
1491
1492static UBool TestVariable(){
1493    SymbolTable tab;
1494    Pick * value = new Literal("string1");
1495    Variable var1(&tab, "x", value);
1496
1497    Variable var2(&tab, "y");
1498//    tab.put(var2, value); // TOFIX: point alias/recursion problem
1499    Pick * value2 = new Literal("string2");
1500    tab.put(var2, value2);
1501
1502    Pick * value3 = new Literal("string3");
1503    Variable var3(&tab, "z");
1504    tab.put("z", value3);
1505
1506    UBool pass;
1507    pass = strcmp(var1.next(), value->next()) == 0;
1508    pass = pass && strcmp(var2.next(), value2->next()) == 0;
1509    pass = pass && strcmp(var3.next(), value3->next()) == 0;
1510    return pass;
1511}
1512
1513static UBool TestSymbolTable(){
1514    Literal * n1 = new Literal("string1");
1515    Literal * n2 = new Literal("string2");
1516    SymbolTable t;
1517    t.put("abc", n1);
1518    t.put("$aaa", n2);
1519//    t.put("alias", n1);  // TOFIX: point alias/recursion problem
1520    t.put("bbb");
1521
1522    UBool pass;
1523    pass = t.find(NULL) == SymbolTable::EMPTY;
1524    pass = pass && t.find("ccc") == SymbolTable::NO_VAR;
1525    pass = pass && t.find("bbb") == SymbolTable::NO_REF;
1526    pass = pass && t.find("abc") == SymbolTable::HAS_REF;
1527    pass = pass && t.find("$aaa") == SymbolTable::HAS_REF;
1528
1529    t.reset();
1530    pass = pass && t.find("abc") == SymbolTable::NO_VAR;
1531    return pass;
1532}
1533
1534
1535static UBool TestScanner(void){
1536    //const char str1[] = "$root = $command{0,5} $reset $mostRules{1,20};";
1537    //const char str1_r[][20] = {"$root", "=", "$command", "{", "0", ",", "5", "}",
1538    //    "$reset", "$mostRules", "{", "1", ",", "20", "}", ";"};
1539
1540    const char str2[] = "$p2 =(\\\\ $s $string $s)? 25%;";
1541    const char str2_r[][20] = {"$p2", "=", "(", "\\", "$s", "$string", "$s", ")", "?", "25", "%", ";"};
1542
1543    const char *str = str2;
1544    const char (*str_r)[20] = str2_r;
1545    int tokenNum = sizeof(str2_r)/sizeof(char[20]);
1546
1547    Scanner t(str);
1548    UBool pass = TRUE;
1549    t.getNextToken();
1550    int i = 0;
1551    while (pass){
1552        if (t.tokenType == STREAM_END){
1553            pass = pass? i == tokenNum : FALSE;
1554            break;//while
1555        } else if (t.tokenType == ERROR){
1556            pass = FALSE;
1557            break;//while
1558        } else {
1559            pass = strcmp( &(t.token[0]), str_r[i++]) == 0;
1560            t.getNextToken();
1561        }
1562    }
1563
1564    //const char ts[] = "$commandList = '['"
1565    //" ( alternate ' ' $alternateOptions"
1566    //" | backwards ' 2'"
1567    //" | normalization ' ' $onoff "
1568    //" | caseLevel ' ' $onoff "
1569    //" | hiraganaQ ' ' $onoff"
1570    //" | caseFirst ' ' $caseFirstOptions"
1571    //" | strength ' ' $strengthOptions"
1572    //" ) ']';" ;
1573
1574    //Scanner t2(ts);
1575    //pass = TRUE;
1576    //do {
1577    //    t2.getNextToken();
1578    //    if (t2.tokenType == ERROR){
1579    //        DumpScanner(t2);
1580    //        return FALSE;
1581    //    }
1582    //}while (t.tokenType != STREAM_END);
1583
1584    return pass;
1585}
1586
1587class TestParserT {
1588public:
1589UBool operator () (const char *const str, const int exp_error_offset = -1, const UBool dump = TRUE){
1590    Parser par(str);
1591    if (par.rules()){
1592        if ( exp_error_offset == -1){
1593            return TRUE;
1594        }else {
1595            DumpScanner(par.s,dump);
1596            return FALSE;
1597        }
1598    }else {
1599        return DumpScanner(par.s, dump) == exp_error_offset;
1600    }
1601}
1602};
1603
1604UBool TestParser(){
1605    TestParserT test;
1606
1607    UBool pass = TRUE;
1608    pass = pass && test ("$s = ' ' ? 50%;");
1609    pass = pass && test("$x = ($var {1,2}) 3%;");         // legal
1610    pass = pass && test("$x = $var {1,2} 3% | b 4%;");    // legal
1611    pass = pass && test("$x = $var {1,2} 3%;");           // legal
1612    pass = pass && test("$m = $c ? 2% 4% | $r 5% | $n 25%;"); // legal
1613    pass = pass && test("$a = b ? 2% | c 5%;");               // legal
1614    pass = pass && test("$x = A B 5% C 10% | D;", 8, FALSE);  // illegal 5%
1615    pass = pass && test("$x = aa 45% | bb 5% cc;", 19, FALSE);// illegal cc
1616    pass = pass && test("$x = (b 5%) (c 6%);");               // legal
1617    pass = pass && test("$x = (b 5%) c 6%;", 13, FALSE);      // illegal 6%
1618    pass = pass && test("$x = b 5% (c 6%);", 9, FALSE);       // illegal (c 6%)
1619    pass = pass && test("$x = b 5% c 6%;", 9, FALSE);         // illegal c 6%
1620    pass = pass && test("$x = b 5%;");                        // legal
1621    pass = pass && test("$x = aa 45% | bb 5% cc;", 19, FALSE);// illegal cc
1622    pass = pass && test("$x = a | b  | c 4% | d 5%;");        // legal
1623    pass = pass && test("$s = ' ' ? 50% abc;");               // legal
1624    pass = pass && test("$s =  a | c d | e f;");              // legal
1625    pass = pass && test( "$z = q 0% | p 1% | r 100%;");         // legal How to check parsed tree??
1626
1627    pass = pass && test("$s = ' ' ? 50%;");
1628    pass = pass && test("$relationList = '<' | '<<' |  ';' | '<<<' | ',' | '=';");
1629    pass = pass && test("$p1 = ($string $s '|' $s)? 25%;");
1630    pass = pass && test("$p2 = (\\\\ $s $string $s)? 25%;");
1631    pass = pass && test("$rel2 = $p1 $string $s $p2;");
1632    pass = pass && test("$relation = $relationList $s ($rel1 | $rel2) $crlf;");
1633    pass = pass && test("$command = $commandList $crlf;");
1634    pass = pass && test("$reset = '&' $s ($beforeList $s)? 10% ($positionList 100% | $string 10%) $crlf;");
1635    pass = pass && test("$mostRules = $command 1% | $reset 5% | $relation 25%;");
1636    pass = pass && test("$root = $command{0,5} $reset $mostRules{1,20};");
1637
1638    const char collationBNF[] =
1639    "$s = ' '? 50%;"
1640    "$crlf = '\r\n';"
1641
1642    "$alternateOptions = non'-'ignorable | shifted;"
1643    "$onoff = on | off;"
1644    "$caseFirstOptions = off | upper | lower;"
1645    "$strengthOptions = '1' | '2' | '3' | '4' | 'I';"
1646    "$commandList = '['"
1647    " ( alternate ' ' $alternateOptions"
1648    " | backwards ' 2'"
1649    " | normalization ' ' $onoff "
1650    " | caseLevel ' ' $onoff "
1651    " | hiraganaQ ' ' $onoff"
1652    " | caseFirst ' ' $caseFirstOptions"
1653    " | strength ' ' $strengthOptions"
1654    " ) ']';"
1655    "$command = $commandList $crlf;"
1656
1657    "$ignorableTypes = (tertiary | secondary | primary) ' ' ignorable;"
1658    "$allTypes = variable | regular | implicit | trailing | $ignorableTypes;"
1659    "$positionList = '[' (first | last) ' ' $allTypes ']';"
1660
1661    "$beforeList = '[before ' ('1' | '2' | '3') ']';"
1662
1663    "$relationList = ("
1664    "   '<'"
1665    " | '<<'"
1666    " | ';'"
1667    " | '<<<'"
1668    " | ','"
1669    " | '='"
1670    ");"
1671    "$string = $magic;"
1672    "$rel1 = '[variable top]' $s;"
1673    "$p1 = ($string $s '|' $s)? 25%;"
1674    "$p2 = (\\\\ $s $string $s)? 25%;"
1675    "$rel2 = $p1 $string $s $p2;"
1676    "$relation = $relationList $s ($rel1 | $rel2) $crlf;"
1677
1678    "$reset = '&' $s ($beforeList $s)? 10% ($positionList 1% | $string 10%) $crlf;"
1679    "$mostRules = $command 1% | $reset 5% | $relation 25%;"
1680    "$root = $command{0,5} $reset $mostRules{1,20};"
1681    ;
1682
1683    pass = pass && test(collationBNF);
1684
1685
1686    return pass;
1687}
1688
1689static UBool TestMorph(){
1690    srand((unsigned)time( NULL ));
1691
1692    Alternation * alt = new Alternation();
1693
1694    (*alt)
1695    .append(new Literal("a")).append(new Literal("b")).append(new Literal("c"))
1696    .append(new Literal("d")).append(new Literal("e")).append(new Literal("f"))
1697    .append(new Literal("g")).append(new Literal("h")).append(new Literal("i"))
1698    .append(new Literal("j")).append(new Literal("k")).append(new Literal("l"))
1699    .append(new Literal("m")).append(new Literal("n")).append(new Literal("o"))
1700    ;
1701
1702    Repeat * rep = new Repeat( alt ,5,5 );
1703    Morph m( *rep);
1704
1705//    DUMP_R(TestMorph,(*rep),20);
1706    DUMP_R(TestMorph,m,100);
1707
1708    return FALSE;
1709}
1710
1711#endif
1712
1713static UBool TestLanguageGenerator(){
1714    //LanguageGenerator g;
1715    //const char *const s = "$s = p 0% | q 1%;";
1716    //g.parseBNF(s, "$s");
1717    UBool pass;
1718    //= strcmp("q", g.next()) == 0;
1719
1720    const char *const def =
1721        //"$a = $b;"
1722        //"$b = $c;"
1723        //"$c = $t;"
1724        //"$t = abc $z{1,2};"
1725        //"$k = a | b | c | d | e | f | g ;"
1726        //"$z = q 0% | p 1% | r 1%;"
1727        "$x = a ? 0%;"
1728        ; // end of string
1729//    const char * s = "abczz";
1730//
1731//
1732    LanguageGenerator g;
1733    pass = g.parseBNF(def, "$x",TRUE);
1734////    LanguageGenerator g(collationBNF, "$root", "$magic", new MagicNode());
1735//
1736    if (pass != LanguageGenerator::OK) return FALSE;
1737
1738    DUMP_R(TestLanguageGenerator, g, 20);
1739    return pass;
1740
1741    ////UBool pass = strcmp(s,r) == 0;
1742
1743    //if (pass){
1744    //    printf("TestRandomLanguageGenerator passed.\n");
1745    //} else {
1746    //    printf("TestRandomLanguageGenerator FAILED!!!\n");
1747    //}
1748    //return pass;
1749}
1750
1751void TestWbnf(void){
1752    srand((unsigned)time( NULL ));
1753
1754    //CALL(TestLiteral);
1755    //CALL(TestSequence);
1756    //CALL(TestSymbolTable);
1757    //CALL(TestVariable);
1758
1759    //TestRepeat();
1760    //TestAlternation();
1761    //TestMorph();
1762
1763    //TestQuote();
1764    //TestBuffer();
1765    //TestWeightedRand();
1766
1767    //CALL(TestScanner);
1768    //CALL(TestParser);
1769    CALL(TestLanguageGenerator);
1770}
1771
1772