1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (C) 2002, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h>
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdlib.h>
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ustring.h>
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ubrk.h>
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC int c_main(void);
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printTextRange(UChar* str, int32_t start, int32_t end)
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  char    charBuf[1000];
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  UChar   savedEndChar;
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  savedEndChar = str[end];
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  str[end] = 0;
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  u_austrncpy(charBuf, str+start, sizeof(charBuf)-1);
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  charBuf[sizeof(charBuf)-1]=0;
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("string[%2d..%2d] \"%s\"\n", start, end-1, charBuf);
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  str[end] = savedEndChar;
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print each element in order: */
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printEachForward( UBreakIterator* boundary, UChar* str) {
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t end;
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t start = ubrk_first(boundary);
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  for (end = ubrk_next(boundary); end != UBRK_DONE; start = end, end =
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	 ubrk_next(boundary)) {
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printTextRange(str, start, end );
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print each element in reverse order: */
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printEachBackward( UBreakIterator* boundary, UChar* str) {
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t start;
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t end = ubrk_last(boundary);
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  for (start = ubrk_previous(boundary); start != UBRK_DONE;  end = start,
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	 start =ubrk_previous(boundary)) {
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printTextRange( str, start, end );
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print first element */
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printFirst(UBreakIterator* boundary, UChar* str) {
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t end;
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t start = ubrk_first(boundary);
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  end = ubrk_next(boundary);
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printTextRange( str, start, end );
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print last element */
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printLast(UBreakIterator* boundary, UChar* str) {
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t start;
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t end = ubrk_last(boundary);
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  start = ubrk_previous(boundary);
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printTextRange(str, start, end );
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print the element at a specified position */
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printAt(UBreakIterator* boundary, int32_t pos , UChar* str) {
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t start;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t end = ubrk_following(boundary, pos);
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  start = ubrk_previous(boundary);
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printTextRange(str, start, end );
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Creating and using text boundaries*/
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint c_main( void ) {
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  UBreakIterator *boundary;
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  char           cStringToExamine[] = "Aaa bbb ccc. Ddd eee fff.";
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  UChar          stringToExamine[sizeof(cStringToExamine)+1];
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  UErrorCode     status = U_ZERO_ERROR;
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("\n\n"
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	 "C Boundary Analysis\n"
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru	 "-------------------\n\n");
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("Examining: %s\n", cStringToExamine);
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  u_uastrcpy(stringToExamine, cStringToExamine);
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  /*print each sentence in forward and reverse order*/
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  boundary = ubrk_open(UBRK_SENTENCE, "en_us", stringToExamine,
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru		       -1, &status);
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if (U_FAILURE(status)) {
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    printf("ubrk_open error: %s\n", u_errorName(status));
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    exit(1);
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("\n----- Sentence Boundaries, forward: -----------\n");
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printEachForward(boundary, stringToExamine);
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("\n----- Sentence Boundaries, backward: ----------\n");
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printEachBackward(boundary, stringToExamine);
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  ubrk_close(boundary);
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  /*print each word in order*/
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  boundary = ubrk_open(UBRK_WORD, "en_us", stringToExamine,
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru		       u_strlen(stringToExamine), &status);
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("\n----- Word Boundaries, forward: -----------\n");
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printEachForward(boundary, stringToExamine);
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("\n----- Word Boundaries, backward: ----------\n");
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printEachBackward(boundary, stringToExamine);
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  /*print first element*/
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("\n----- first: -------------\n");
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printFirst(boundary, stringToExamine);
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  /*print last element*/
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("\n----- last: --------------\n");
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printLast(boundary, stringToExamine);
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  /*print word at charpos 10 */
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("\n----- at pos 10: ---------\n");
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printAt(boundary, 10 , stringToExamine);
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  ubrk_close(boundary);
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  printf("\nEnd of C boundary analysis\n");
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  return 0;
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
129