1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 2002, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdlib.h> 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ustring.h> 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <unicode/ubrk.h> 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC int c_main(void); 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printTextRange(UChar* str, int32_t start, int32_t end) 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char charBuf[1000]; 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar savedEndChar; 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru savedEndChar = str[end]; 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru str[end] = 0; 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_austrncpy(charBuf, str+start, sizeof(charBuf)-1); 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru charBuf[sizeof(charBuf)-1]=0; 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("string[%2d..%2d] \"%s\"\n", start, end-1, charBuf); 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru str[end] = savedEndChar; 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print each element in order: */ 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printEachForward( UBreakIterator* boundary, UChar* str) { 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t end; 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start = ubrk_first(boundary); 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (end = ubrk_next(boundary); end != UBRK_DONE; start = end, end = 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ubrk_next(boundary)) { 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printTextRange(str, start, end ); 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print each element in reverse order: */ 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printEachBackward( UBreakIterator* boundary, UChar* str) { 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start; 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t end = ubrk_last(boundary); 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (start = ubrk_previous(boundary); start != UBRK_DONE; end = start, 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start =ubrk_previous(boundary)) { 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printTextRange( str, start, end ); 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print first element */ 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printFirst(UBreakIterator* boundary, UChar* str) { 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t end; 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start = ubrk_first(boundary); 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru end = ubrk_next(boundary); 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printTextRange( str, start, end ); 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print last element */ 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printLast(UBreakIterator* boundary, UChar* str) { 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t end = ubrk_last(boundary); 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start = ubrk_previous(boundary); 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printTextRange(str, start, end ); 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Print the element at a specified position */ 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid printAt(UBreakIterator* boundary, int32_t pos , UChar* str) { 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t end = ubrk_following(boundary, pos); 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start = ubrk_previous(boundary); 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printTextRange(str, start, end ); 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Creating and using text boundaries*/ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint c_main( void ) { 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBreakIterator *boundary; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char cStringToExamine[] = "Aaa bbb ccc. Ddd eee fff."; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar stringToExamine[sizeof(cStringToExamine)+1]; 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n\n" 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "C Boundary Analysis\n" 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "-------------------\n\n"); 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("Examining: %s\n", cStringToExamine); 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_uastrcpy(stringToExamine, cStringToExamine); 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*print each sentence in forward and reverse order*/ 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru boundary = ubrk_open(UBRK_SENTENCE, "en_us", stringToExamine, 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru -1, &status); 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("ubrk_open error: %s\n", u_errorName(status)); 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(1); 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n----- Sentence Boundaries, forward: -----------\n"); 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printEachForward(boundary, stringToExamine); 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n----- Sentence Boundaries, backward: ----------\n"); 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printEachBackward(boundary, stringToExamine); 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ubrk_close(boundary); 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*print each word in order*/ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru boundary = ubrk_open(UBRK_WORD, "en_us", stringToExamine, 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_strlen(stringToExamine), &status); 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n----- Word Boundaries, forward: -----------\n"); 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printEachForward(boundary, stringToExamine); 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n----- Word Boundaries, backward: ----------\n"); 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printEachBackward(boundary, stringToExamine); 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*print first element*/ 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n----- first: -------------\n"); 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printFirst(boundary, stringToExamine); 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*print last element*/ 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n----- last: --------------\n"); 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printLast(boundary, stringToExamine); 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*print word at charpos 10 */ 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n----- at pos 10: ---------\n"); 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printAt(boundary, 10 , stringToExamine); 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ubrk_close(boundary); 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\nEnd of C boundary analysis\n"); 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 129