1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************* 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 2000-2010, International Business Machines 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************* 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* file name: uparse.c 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* encoding: US-ASCII 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* tab size: 8 (not used) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* indentation:4 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created on: 2000apr18 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created by: Markus W. Scherer 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* This file provides a parser for files that are delimited by one single 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* character like ';' or TAB. Example: the Unicode Character Properties files 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* like UnicodeData.txt are semicolon-delimited. 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cstring.h" 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "filestrm.h" 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uparse.h" 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h" 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ustring.h" 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ustr_imp.h" 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <stdio.h> 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const char * U_EXPORT2 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)u_skipWhitespace(const char *s) { 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(U_IS_INV_WHITESPACE(*s)) { 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++s; 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return s; 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI char * U_EXPORT2 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)u_rtrim(char *s) { 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *end=uprv_strchr(s, 0); 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(s<end && U_IS_INV_WHITESPACE(*(end-1))) { 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *--end = 0; 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return end; 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * If the string starts with # @missing: then return the pointer to the 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * following non-whitespace character. 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Otherwise return the original pointer. 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Unicode 5.0 adds such lines in some data files to document 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * default property values. 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Poor man's regex for variable amounts of white space. 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char * 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)getMissingLimit(const char *s) { 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *s0=s; 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(s=u_skipWhitespace(s))=='#' && 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(s=u_skipWhitespace(s+1))=='@' && 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0==strncmp((s=u_skipWhitespace(s+1)), "missing", 7) && 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(s=u_skipWhitespace(s+7))==':' 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return u_skipWhitespace(s+1); 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return s0; 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)u_parseDelimitedFile(const char *filename, char delimiter, 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *fields[][2], int32_t fieldCount, 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseLineFn *lineFn, void *context, 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) FileStream *file; 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char line[300]; 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *start, *limit; 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, length; 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(fields==NULL || lineFn==NULL || fieldCount<=0) { 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) { 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) filename=NULL; 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) file=T_FileStream_stdin(); 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) file=T_FileStream_open(filename, "r"); 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(file==NULL) { 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_FILE_ACCESS_ERROR; 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(T_FileStream_readLine(file, line, sizeof(line))!=NULL) { 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* remove trailing newline characters */ 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=(int32_t)(u_rtrim(line)-line); 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * detect a line with # @missing: 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * start parsing after that, or else from the beginning of the line 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * set the default warning for @missing lines 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start=(char *)getMissingLimit(line); 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(start==line) { 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ZERO_ERROR; 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_USING_DEFAULT_WARNING; 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* skip this line if it is empty or a comment */ 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(*start==0 || *start=='#') { 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* remove in-line comments */ 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=uprv_strchr(start, '#'); 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(limit!=NULL) { 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* get white space before the pound sign */ 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(limit>start && U_IS_INV_WHITESPACE(*(limit-1))) { 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --limit; 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* truncate the line */ 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *limit=0; 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* skip lines with only whitespace */ 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(u_skipWhitespace(start)[0]==0) { 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* for each field, call the corresponding field function */ 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<fieldCount; ++i) { 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the limit pointer of this field */ 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=start; 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(*limit!=delimiter && *limit!=0) { 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++limit; 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the field start and limit in the fields array */ 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fields[i][0]=start; 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fields[i][1]=limit; 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set start to the beginning of the next field, if any */ 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start=limit; 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(*start!=0) { 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++start; 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(i+1<fieldCount) { 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_PARSE_ERROR; 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=line+length; 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=fieldCount; 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* error in a field function? */ 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* call the field function */ 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lineFn(context, fields, fieldCount, pErrorCode); 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(filename!=NULL) { 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T_FileStream_close(file); 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * parse a list of code points 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * store them as a UTF-32 string in dest[destCapacity] 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * return the number of code points 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)u_parseCodePoints(const char *s, 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *dest, int32_t destCapacity, 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *end; 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value; 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t count; 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) { 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count=0; 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(;;) { 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=u_skipWhitespace(s); 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(*s==';' || *s==0) { 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return count; 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* read one code point */ 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=(uint32_t)uprv_strtoul(s, &end, 16); 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_PARSE_ERROR; 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* append it to the destination array */ 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(count<destCapacity) { 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dest[count++]=value; 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* go to the following characters */ 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=end; 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * parse a list of code points 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * store them as a string in dest[destCapacity] 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * set the first code point in *pFirst 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The length of the string in numbers of UChars. 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)u_parseString(const char *s, 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *dest, int32_t destCapacity, 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *pFirst, 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *end; 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value; 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t destLength; 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) { 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(pFirst!=NULL) { 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pFirst=0xffffffff; 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) destLength=0; 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(;;) { 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=u_skipWhitespace(s); 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(*s==';' || *s==0) { 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(destLength<destCapacity) { 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dest[destLength]=0; 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(destLength==destCapacity) { 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return destLength; 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* read one code point */ 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=(uint32_t)uprv_strtoul(s, &end, 16); 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_PARSE_ERROR; 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* store the first code point */ 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(pFirst!=NULL) { 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pFirst=value; 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pFirst=NULL; 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* append it to the destination array */ 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if((destLength+U16_LENGTH(value))<=destCapacity) { 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U16_APPEND_UNSAFE(dest, destLength, value); 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) destLength+=U16_LENGTH(value); 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* go to the following characters */ 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=end; 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* read a range like start or start..end */ 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)u_parseCodePointRangeAnyTerminator(const char *s, 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *pStart, uint32_t *pEnd, 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char **terminator, 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *end; 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value; 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(s==NULL || pStart==NULL || pEnd==NULL) { 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* read the start code point */ 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=u_skipWhitespace(s); 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=(uint32_t)uprv_strtoul(s, &end, 16); 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(end<=s || value>=0x110000) { 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_PARSE_ERROR; 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pStart=*pEnd=value; 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* is there a "..end"? */ 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=u_skipWhitespace(end); 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(*s!='.' || s[1]!='.') { 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *terminator=end; 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 1; 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=u_skipWhitespace(s+2); 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* read the end code point */ 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=(uint32_t)uprv_strtoul(s, &end, 16); 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(end<=s || value>=0x110000) { 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_PARSE_ERROR; 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pEnd=value; 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* is this a valid range? */ 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value<*pStart) { 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_PARSE_ERROR; 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *terminator=end; 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return value-*pStart+1; 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)u_parseCodePointRange(const char *s, 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *pStart, uint32_t *pEnd, 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *terminator; 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t rangeLength= 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode); 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(*pErrorCode)) { 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) terminator=u_skipWhitespace(terminator); 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(*terminator!=';' && *terminator!=0) { 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_PARSE_ERROR; 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return rangeLength; 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) { 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *read = source; 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i = 0; 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsigned int value = 0; 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(sLen == -1) { 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sLen = (int32_t)strlen(source); 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(read < source+sLen) { 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sscanf(read, "%2x", &value); 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i < destCapacity) { 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dest[i] = (char)value; 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i++; 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) read += 2; 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return u_terminateChars(dest, destCapacity, i, status); 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 380