1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
464339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert*   Copyright (C) 2016 and later: Unicode, Inc. and others.
564339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert*   License & terms of use: http://www.unicode.org/copyright.html#License
664339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert*
764339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert*******************************************************************************
864339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert*******************************************************************************
964339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert*
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (C) 2002-2003, International Business Machines
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h>
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/brkiter.h>
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdlib.h>
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC int c_main(void);
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printUnicodeString(const UnicodeString &s) {
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char charBuf[1000];
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    s.extract(0, s.length(), charBuf, sizeof(charBuf)-1, 0);
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    charBuf[sizeof(charBuf)-1] = 0;
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("%s", charBuf);
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printTextRange( BreakIterator& iterator,
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    int32_t start, int32_t end )
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    CharacterIterator *strIter = iterator.getText().clone();
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString  s;
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    strIter->getText(s);
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf(" %ld %ld\t", (long)start, (long)end);
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString(UnicodeString(s, 0, start));
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("|");
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString(UnicodeString(s, start, end-start));
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("|");
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString(UnicodeString(s, end));
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("");
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete strIter;
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print each element in order: */
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printEachForward( BreakIterator& boundary)
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t start = boundary.first();
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (int32_t end = boundary.next();
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         end != BreakIterator::DONE;
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         start = end, end = boundary.next())
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printTextRange( boundary, start, end );
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print each element in reverse order: */
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printEachBackward( BreakIterator& boundary)
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t end = boundary.last();
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (int32_t start = boundary.previous();
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         start != BreakIterator::DONE;
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         end = start, start = boundary.previous())
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printTextRange( boundary, start, end );
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print the first element */
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printFirst(BreakIterator& boundary)
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t start = boundary.first();
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t end = boundary.next();
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printTextRange( boundary, start, end );
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print the last element */
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printLast(BreakIterator& boundary)
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t end = boundary.last();
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t start = boundary.previous();
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printTextRange( boundary, start, end );
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print the element at a specified position */
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printAt(BreakIterator &boundary, int32_t pos )
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t end = boundary.following(pos);
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t start = boundary.previous();
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printTextRange( boundary, start, end );
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Creating and using text boundaries */
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint main( void )
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("ICU Break Iterator Sample Program\n");
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("C++ Break Iteration\n");
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    BreakIterator* boundary;
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString stringToExamine("Aaa bbb ccc. Ddd eee fff.");
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("Examining: ");
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printUnicodeString(stringToExamine);
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("");
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //print each sentence in forward and reverse order
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    boundary = BreakIterator::createSentenceInstance(
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        Locale::getUS(), status );
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("failed to create sentence break iterator.  status = %s",
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            u_errorName(status));
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit(1);
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    boundary->setText(stringToExamine);
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("\n Sentence Boundaries... ");
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("----- forward: -----------");
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printEachForward(*boundary);
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("----- backward: ----------");
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printEachBackward(*boundary);
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete boundary;
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //print each word in order
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("\n Word Boundaries... \n");
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    boundary = BreakIterator::createWordInstance(
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        Locale::getUS(), status);
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    boundary->setText(stringToExamine);
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("----- forward: -----------");
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printEachForward(*boundary);
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //print first element
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("----- first: -------------");
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printFirst(*boundary);
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //print last element
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("----- last: --------------");
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printLast(*boundary);
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //print word at charpos 10
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("----- at pos 10: ---------");
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printAt(*boundary, 10 );
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete boundary;
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    puts("\nEnd C++ Break Iteration");
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Call the C version
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return c_main();
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
150