1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Copyright (C) 2000-2010, International Business Machines
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   file name:  uparse.h
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   encoding:   US-ASCII
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   tab size:   8 (not used)
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   indentation:4
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created on: 2000apr18
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created by: Markus W. Scherer
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   This file provides a parser for files that are delimited by one single
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   character like ';' or TAB. Example: the Unicode Character Properties files
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   like UnicodeData.txt are semicolon-delimited.
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef __UPARSE_H__
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define __UPARSE_H__
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2627f654740f2a26ad62a5c155af9199af9e69b889claireho/**
2727f654740f2a26ad62a5c155af9199af9e69b889claireho * Is c an invariant-character whitespace?
2827f654740f2a26ad62a5c155af9199af9e69b889claireho * @param c invariant character
2927f654740f2a26ad62a5c155af9199af9e69b889claireho */
3027f654740f2a26ad62a5c155af9199af9e69b889claireho#define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
3127f654740f2a26ad62a5c155af9199af9e69b889claireho
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Skip space ' ' and TAB '\t' characters.
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s Pointer to characters.
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return Pointer to first character at or after s that is not a space or TAB.
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI const char * U_EXPORT2
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_skipWhitespace(const char *s);
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Trim whitespace (including line endings) from the end of the string.
4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s Pointer to the string.
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return Pointer to the new end of the string.
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
4950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI char * U_EXPORT2
5050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_rtrim(char *s);
5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** Function type for u_parseDelimitedFile(). */
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef void U_CALLCONV
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUParseLineFn(void *context,
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              char *fields[][2],
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t fieldCount,
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UErrorCode *pErrorCode);
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parser for files that are similar to UnicodeData.txt:
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function opens the file and reads it line by line. It skips empty lines
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and comment lines that start with a '#'.
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * All other lines are separated into fields with one delimiter character
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (semicolon for Unicode Properties files) between two fields. The last field in
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a line does not need to be terminated with a delimiter.
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For each line, after segmenting it, a line function is called.
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It gets passed the array of field start and limit pointers that is
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * passed into this parser and filled by it for each line.
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For each field i of the line, the start pointer in fields[i][0]
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * points to the beginning of the field, while the limit pointer in fields[i][1]
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * points behind the field, i.e., to the delimiter or the line end.
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The context parameter of the line function is
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the same as the one for the parse function.
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The line function may modify the contents of the fields including the
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * limit characters.
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If the file cannot be opened, or there is a parsing error or a field function
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_parseDelimitedFile(const char *filename, char delimiter,
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     char *fields[][2], int32_t fieldCount,
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     UParseLineFn *lineFn, void *context,
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     UErrorCode *pErrorCode);
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a string of code points like 0061 0308 0300.
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s must end with either ';' or NUL.
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return Number of code points.
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_parseCodePoints(const char *s,
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  uint32_t *dest, int32_t destCapacity,
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  UErrorCode *pErrorCode);
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a list of code points like 0061 0308 0300
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * into a UChar * string.
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s must end with either ';' or NUL.
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set the first code point in *pFirst.
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s Input char * string.
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dest Output string buffer.
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param destCapacity Capacity of dest in numbers of UChars.
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *               code point in the string.
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pErrorCode ICU error code.
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return The length of the string in numbers of UChars.
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_parseString(const char *s,
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UChar *dest, int32_t destCapacity,
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              uint32_t *pFirst,
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UErrorCode *pErrorCode);
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a code point range like
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0085 or
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 4E00..9FA5.
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s must contain such a range and end with either ';' or NUL.
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return Length of code point range, end-start+1
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_parseCodePointRange(const char *s,
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      uint32_t *pStart, uint32_t *pEnd,
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UErrorCode *pErrorCode);
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Same as u_parseCodePointRange() but the range may be terminated by
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * any character. The position of the terminating character is returned via
13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the *terminator output parameter.
13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI int32_t U_EXPORT2
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_parseCodePointRangeAnyTerminator(const char *s,
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                   uint32_t *pStart, uint32_t *pEnd,
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                   const char **terminator,
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                   UErrorCode *pErrorCode);
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
152