1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (c) 2001-2006, International Business Machines 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Date Name Description 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 11/19/2001 aliu Creation. 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************** 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "util.h" 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h" 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Parse an integer at pos, either of the form \d+ or of the form 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or octal format. 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pos INPUT-OUTPUT parameter. On input, the first 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * character to parse. On output, the character after the last 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * parsed character. 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit) { 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t count = 0; 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t value = 0; 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t p = pos; 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int8_t radix = 10; 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p < limit && rule.charAt(p) == 48 /*0*/) { 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p+1 < limit && (rule.charAt(p+1) == 0x78 /*x*/ || rule.charAt(p+1) == 0x58 /*X*/)) { 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p += 2; 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) radix = 16; 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p++; 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count = 1; 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) radix = 8; 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (p < limit) { 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t d = u_digit(rule.charAt(p++), radix); 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (d < 0) { 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --p; 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++count; 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t v = (value * radix) + d; 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (v <= value) { 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If there are too many input digits, at some point 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the value will go negative, e.g., if we have seen 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // "0x8000000" already and there is another '0', when 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we parse the next 0 the value will go negative. 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value = v; 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (count > 0) { 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = p; 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return value; 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Parse a pattern string starting at offset pos. Keywords are 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * matched case-insensitively. Spaces may be skipped and may be 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * optional or required. Integer values may be parsed, and if 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * they are, they will be returned in the given array. If 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * successful, the offset of the next non-space character is 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * returned. On failure, -1 is returned. 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pattern must only contain lowercase characters, which 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * will match their uppercase equivalents as well. A space 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * character matches one or more required spaces. A '~' character 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * matches zero or more optional spaces. A '#' character matches 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * an integer and stores it in parsedInts, which the caller must 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ensure has enough capacity. 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param parsedInts array to receive parsed integers. Caller 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * must ensure that parsedInts.length is >= the number of '#' 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * signs in 'pattern'. 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return the position after the last character parsed, or -1 if 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the parse failed 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit, 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString& pattern, int32_t* parsedInts) { 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO Update this to handle surrogates 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t p; 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t intCount = 0; // number of integers parsed 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (int32_t i=0; i<pattern.length(); ++i) { 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar cpat = pattern.charAt(i); 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar c; 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch (cpat) { 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 32 /*' '*/: 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pos >= limit) { 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = rule.charAt(pos++); 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!uprv_isRuleWhiteSpace(c)) { 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // FALL THROUGH to skipWhitespace 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 126 /*'~'*/: 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = skipWhitespace(rule, pos); 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 35 /*'#'*/: 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p = pos; 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parsedInts[intCount++] = parseInteger(rule, p, limit); 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p == pos) { 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Syntax error; failed to parse integer 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = p; 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pos >= limit) { 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = (UChar) u_tolower(rule.charAt(pos++)); 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c != cpat) { 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return pos; 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Parse a Unicode identifier from the given string at the given 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * position. Return the identifier, or an empty string if there 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * is no identifier. 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param str the string to parse 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pos INPUT-OUPUT parameter. On INPUT, pos is the 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * first character to examine. It must be less than str.length(), 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and it must not point to a whitespace character. That is, must 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * have pos < str.length() and 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * !uprv_isRuleWhiteSpace(str.char32At(pos)). On 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * OUTPUT, the position after the last parsed character. 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return the Unicode identifier, or an empty string if there is 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * no valid identifier at pos. 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) { 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // assert(pos < str.length()); 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // assert(!uprv_isRuleWhiteSpace(str.char32At(pos))); 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString buf; 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int p = pos; 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (p < str.length()) { 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 ch = str.char32At(p); 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (buf.length() == 0) { 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (u_isIDStart(ch)) { 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buf.append(ch); 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buf.truncate(0); 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return buf; 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (u_isIDPart(ch)) { 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buf.append(ch); 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p += UTF_CHAR_LENGTH(ch); 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = p; 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return buf; 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Parse an unsigned 31-bit integer at the given offset. Use 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UCharacter.digit() to parse individual characters into digits. 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param text the text to be parsed 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * offset within text at which to start parsing; it should point 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to a valid digit. On exit, pos[0] is the offset after the last 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * parsed character. If the parse failed, it will be unchanged on 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * exit. Must be >= 0 on entry. 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param radix the radix in which to parse; must be >= 2 and <= 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 36. 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return a non-negative parsed number, or -1 upon parse failure. 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Parse fails if there are no digits, that is, if pos[0] does not 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * point to a valid digit on entry, or if the number to be parsed 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * does not fit into a 31-bit unsigned integer. 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t ICU_Utility::parseNumber(const UnicodeString& text, 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t& pos, int8_t radix) { 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // assert(pos[0] >= 0); 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // assert(radix >= 2); 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // assert(radix <= 36); 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t n = 0; 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t p = pos; 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (p < text.length()) { 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 ch = text.char32At(p); 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t d = u_digit(ch, radix); 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (d < 0) { 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) n = radix*n + d; 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // ASSUME that when a 32-bit integer overflows it becomes 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // negative. E.g., 214748364 * 10 + 8 => negative value. 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (n < 0) { 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++p; 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p == pos) { 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = p; 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return n; 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 215